From 2b7b46454bb6480e60656a1827ffd525055d1f49 Mon Sep 17 00:00:00 2001 From: Alistair Low Date: Sat, 18 Nov 2017 12:53:50 +0000 Subject: [PATCH 001/629] Added a check for a macro to specify that an ARM device is not a mobile platform - copying the existing mechanism for Raspberry Pi. --- tensorflow/core/platform/platform.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/platform/platform.h b/tensorflow/core/platform/platform.h index 12120c4ab9..0481b36871 100644 --- a/tensorflow/core/platform/platform.h +++ b/tensorflow/core/platform/platform.h @@ -43,10 +43,11 @@ limitations under the License. #elif defined(__arm__) #define PLATFORM_POSIX -// Require an outside macro to tell us if we're building for Raspberry Pi. -#if !defined(RASPBERRY_PI) +// Require an outside macro to tell us if we're building for Raspberry Pi or +// another ARM device that's not a mobile platform. +#if !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) #define IS_MOBILE_PLATFORM -#endif // !defined(RASPBERRY_PI) +#endif // !defined(RASPBERRY_PI) && !defined(ARM_NON_MOBILE) #else // If no platform specified, use: -- GitLab From b598316d1c96528a4912dda2053fafd3dec38869 Mon Sep 17 00:00:00 2001 From: John Sungjin Park Date: Fri, 1 Dec 2017 08:49:57 +0900 Subject: [PATCH 002/629] Update freeze_graph.py --- tensorflow/python/tools/freeze_graph.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index 0ddf09260b..62c6d8e35b 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -107,7 +107,7 @@ def freeze_graph_with_def_protos(input_graph_def, input_meta_graph_def, clear_devices=True) restorer.restore(sess, input_checkpoint) if initializer_nodes: - sess.run(initializer_nodes.split(",")) + sess.run(initializer_nodes.replace(' ','').split(",")) elif input_saved_model_dir: if saved_model_tags is None: saved_model_tags = [] @@ -127,25 +127,25 @@ def freeze_graph_with_def_protos(input_graph_def, saver = saver_lib.Saver(var_list=var_list) saver.restore(sess, input_checkpoint) if initializer_nodes: - sess.run(initializer_nodes.split(",")) + sess.run(initializer_nodes.replace(' ','').split(",")) - variable_names_whitelist = (variable_names_whitelist.split(",") + variable_names_whitelist = (variable_names_whitelist.replace(' ','').split(",") if variable_names_whitelist else None) - variable_names_blacklist = (variable_names_blacklist.split(",") + variable_names_blacklist = (variable_names_blacklist.replace(' ','').split(",") if variable_names_blacklist else None) if input_meta_graph_def: output_graph_def = graph_util.convert_variables_to_constants( sess, input_meta_graph_def.graph_def, - output_node_names.split(","), + output_node_names.replace(' ','').split(","), variable_names_whitelist=variable_names_whitelist, variable_names_blacklist=variable_names_blacklist) else: output_graph_def = graph_util.convert_variables_to_constants( sess, input_graph_def, - output_node_names.split(","), + output_node_names.replace(' ','').split(","), variable_names_whitelist=variable_names_whitelist, variable_names_blacklist=variable_names_blacklist) @@ -236,7 +236,7 @@ def freeze_graph(input_graph, input_graph_def, input_saver_def, input_checkpoint, output_node_names, restore_op_name, filename_tensor_name, output_graph, clear_devices, initializer_nodes, variable_names_whitelist, variable_names_blacklist, - input_meta_graph_def, input_saved_model_dir, saved_model_tags.split(",")) + input_meta_graph_def, input_saved_model_dir, saved_model_tags.replace(' ','').split(",")) def main(unused_args): -- GitLab From e23f2bb8caf56c5fc66394cd081eb717f5626b57 Mon Sep 17 00:00:00 2001 From: Codrut Grosu Date: Tue, 12 Dec 2017 23:46:19 +0200 Subject: [PATCH 003/629] Update the description of the fused parameter. Update the description to reflect the fact that fused=None is equivalent to fused=True. This applies to layers.batch_normalization and contrib.layers.python.layers.batch_norm. --- tensorflow/contrib/layers/python/layers/layers.py | 4 ++-- tensorflow/python/layers/normalization.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 0d25a09852..1ec6a6764b 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -535,8 +535,8 @@ def batch_norm(inputs, then the batch normalization uses weighted mean and variance. (This can be used to correct for bias in training example selection.) - fused: if `True`, use a faster, fused implementation if possible. - If `None`, use the system recommended implementation. + fused: if `None` or `True`, use a faster, fused implementation if possible. + If `False`, use the system recommended implementation. data_format: A string. `NHWC` (default) and `NCHW` are supported. zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new pair of variables 'moving_mean/biased' and 'moving_mean/local_step'. diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 65e67dd016..de3875c7c6 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -92,8 +92,8 @@ class BatchNormalization(base.Layer): and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `momentum` is still applied to get the means and variances for inference. - fused: if `True`, use a faster, fused implementation if possible. - If `None`, use the system recommended implementation. + fused: if `None` or `True`, use a faster, fused implementation if possible. + If `False`, use the system recommended implementation. trainable: Boolean, if `True` also add variables to the graph collection `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, @@ -719,8 +719,8 @@ def batch_normalization(inputs, and should be neither too small (which would add noise) nor too large (which would give stale estimates). Note that `momentum` is still applied to get the means and variances for inference. - fused: if `True`, use a faster, fused implementation if possible. - If `None`, use the system recommended implementation. + fused: if `None` or `True`, use a faster, fused implementation if possible. + If `False`, use the system recommended implementation. virtual_batch_size: An `int`. By default, `virtual_batch_size` is `None`, which means batch normalization is performed across the whole batch. When `virtual_batch_size` is not `None`, instead perform "Ghost Batch -- GitLab From 81c1568b3db39937295530a154d47512402ea684 Mon Sep 17 00:00:00 2001 From: gdh1995 Date: Thu, 14 Dec 2017 20:26:54 +0800 Subject: [PATCH 004/629] fix that remove_nodes drops input suffixes --- tensorflow/tools/graph_transforms/remove_nodes.cc | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/remove_nodes.cc b/tensorflow/tools/graph_transforms/remove_nodes.cc index 119b44d6a4..05f036a86a 100644 --- a/tensorflow/tools/graph_transforms/remove_nodes.cc +++ b/tensorflow/tools/graph_transforms/remove_nodes.cc @@ -81,7 +81,17 @@ Status RemoveNodes(const GraphDef& input_graph_def, return Status::OK(); } const NodeDef& input_node = match.inputs[0].node; - inputs_to_rename[replace_node.name()] = input_node.name(); + string target_name = input_node.name(); + for (const string& input : replace_node.input()) { + if (!input.compare(0, target_name.size(), target_name)) { + if (input.size() == target_name.size() || + input[target_name.size()] == ':') { + target_name = input; + break; + } + } + } + inputs_to_rename[replace_node.name()] = target_name; inputs_to_rename["^" + replace_node.name()] = "^" + input_node.name(); new_nodes->push_back(input_node); -- GitLab From 3a7b31d735340fcf289d580ab7b9a780b18d622b Mon Sep 17 00:00:00 2001 From: JoshVarty Date: Fri, 29 Dec 2017 21:12:40 +0000 Subject: [PATCH 005/629] Initial 4-D support in flip/rotate/transpose --- tensorflow/python/ops/image_ops_impl.py | 149 ++++++++++++++++++------ 1 file changed, 114 insertions(+), 35 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 21561f3689..83e1de6942 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -262,96 +262,168 @@ def random_flip_left_right(image, seed=None): def flip_left_right(image): """Flip an image horizontally (left to right). - Outputs the contents of `image` flipped along the second dimension, which is - `width`. + Outputs the contents of `image` flipped along the width dimension. See also `reverse()`. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( - _Check3DImage(image, require_static=False), image) - return fix_image_flip_shape(image, array_ops.reverse(image, [1])) + _CheckAtLeast3DImage(image, require_static=False), image) + + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + return fix_image_flip_shape(image, array_ops.reverse(image, [1])) + elif shape.ndims == 4: + return array_ops.reverse(image, [2]) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') def flip_up_down(image): """Flip an image vertically (upside down). - Outputs the contents of `image` flipped along the first dimension, which is - `height`. + Outputs the contents of `image` flipped along the height dimension. See also `reverse()`. Args: - image: A 3-D tensor of shape `[height, width, channels].` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. Returns: - A 3-D tensor of the same type and shape as `image`. + A tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( - _Check3DImage(image, require_static=False), image) - return fix_image_flip_shape(image, array_ops.reverse(image, [0])) + _CheckAtLeast3DImage(image, require_static=False), image) + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + return fix_image_flip_shape(image, array_ops.reverse(image, [0])) + elif shape.ndims == 4: + return array_ops.reverse(image, [1]) + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + def rot90(image, k=1, name=None): - """Rotate an image counter-clockwise by 90 degrees. + """Rotate image(s) counter-clockwise by 90 degrees. Args: - image: A 3-D tensor of shape `[height, width, channels]`. + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. name: A name for this operation (optional). Returns: - A rotated 3-D tensor of the same type and shape as `image`. + A rotated of the same type and shape as `image`. + + Raises: + ValueError: if the shape of `image` not supported. """ with ops.name_scope(name, 'rot90', [image, k]) as scope: image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( - _Check3DImage(image, require_static=False), image) + _CheckAtLeast3DImage(image, require_static=False), image) k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') k.get_shape().assert_has_rank(0) k = math_ops.mod(k, 4) - def _rot90(): - return array_ops.transpose(array_ops.reverse_v2(image, [1]), - [1, 0, 2]) - def _rot180(): - return array_ops.reverse_v2(image, [0, 1]) - def _rot270(): - return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), - [1]) - cases = [(math_ops.equal(k, 1), _rot90), - (math_ops.equal(k, 2), _rot180), - (math_ops.equal(k, 3), _rot270)] + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + return _rot90_3D(image, k, scope) + elif shape.ndims == 4: + pass + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') + - ret = control_flow_ops.case(cases, default=lambda: image, exclusive=True, - name=scope) - ret.set_shape([None, None, image.get_shape()[2]]) - return ret +def _rot90_3D(image, k, scope): + """Rotate image counter-clockwise by 90 degrees `k` times. + Args: + image: 3-D Tensor of shape `[height, width, channels]`. + k: A scalar integer. The number of times the image is rotated by 90 degrees. + + Returns: + A 3-D tensor of the same type and shape as `image`. + + """ + def _rot90(): + return array_ops.transpose(array_ops.reverse_v2(image, [1]), + [1, 0, 2]) + def _rot180(): + return array_ops.reverse_v2(image, [0, 1]) + def _rot270(): + return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), + [1]) + cases = [(math_ops.equal(k, 1), _rot90), + (math_ops.equal(k, 2), _rot180), + (math_ops.equal(k, 3), _rot270)] + + result = control_flow_ops.case(cases, default=lambda: image, exclusive=True, + name=scope) + result.set_shape([None, None, image.get_shape()[2]]) + return result + +def _rot90_4D(images, k, name_scope): + """Rotate batch of images counter-clockwise by 90 degrees `k` times. + + Args: + images: 4-D Tensor of shape `[height, width, channels]`. + k: A scalar integer. The number of times the images are rotated by 90 degrees. + name_scope: A valid TensorFlow name scope. + + Returns: + A 4-D tensor of the same type and shape as `images`. + + """ + def _rot90(): + return array_ops.transpose(array_ops.reverse_v2(images, [2]), + [0, 2, 1, 3]) + def _rot180(): + return array_ops.reverse_v2(images, [1, 2]) + def _rot270(): + return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), + [2]) + + cases = [(math_ops.equal(k, 1), _rot90), + (math_ops.equal(k, 2), _rot180), + (math_ops.equal(k, 3), _rot270)] + + result = control_flow_ops.case(cases, default=lambda: images, exclusive=True, + name=scope) + shape = result.get_shape() + result.set_shape([shape[0], None, None, shape[3]]) + return result def transpose_image(image): - """Transpose an image by swapping the first and second dimension. + """Transpose image(s) by swapping the height and width dimension. See also `transpose()`. Args: - image: 3-D tensor of shape `[height, width, channels]` + image: 4-D Tensor of shape `[batch, height, width, channels]` or + 3-D Tensor of shape `[height, width, channels]`. Returns: - A 3-D tensor of shape `[width, height, channels]` + If `image` was 4-D, a 4-D float Tensor of shape + `[batch, width, height, channels]` + If `image` was 3-D, a 3-D float Tensor of shape + `[width, height, channels]` Raises: ValueError: if the shape of `image` not supported. @@ -359,7 +431,14 @@ def transpose_image(image): image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( _Check3DImage(image, require_static=False), image) - return array_ops.transpose(image, [1, 0, 2], name='transpose_image') + + shape = image.get_shape() + if shape.ndims == 3 or shape.ndims is None: + return array_ops.transpose(image, [1, 0, 2], name='transpose_image') + elif shape.ndims == 4: + return array_ops.transpose(image, [0, 2, 1, 3], name='transpose_image') + else: + raise ValueError('\'image\' must have either 3 or 4 dimensions.') def central_crop(image, central_fraction): -- GitLab From 02f31ec80d6d302842d079778924ced788dfcbc5 Mon Sep 17 00:00:00 2001 From: JoshVarty Date: Fri, 29 Dec 2017 22:32:24 +0000 Subject: [PATCH 006/629] Implement tests --- tensorflow/python/ops/image_ops_impl.py | 11 +- tensorflow/python/ops/image_ops_test.py | 135 +++++++++++++++++++++--- 2 files changed, 126 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 83e1de6942..cf21609a55 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -346,17 +346,18 @@ def rot90(image, k=1, name=None): if shape.ndims == 3 or shape.ndims is None: return _rot90_3D(image, k, scope) elif shape.ndims == 4: - pass + return _rot90_4D(image, k, scope) else: raise ValueError('\'image\' must have either 3 or 4 dimensions.') -def _rot90_3D(image, k, scope): +def _rot90_3D(image, k, name_scope): """Rotate image counter-clockwise by 90 degrees `k` times. Args: image: 3-D Tensor of shape `[height, width, channels]`. k: A scalar integer. The number of times the image is rotated by 90 degrees. + name_scope: A valid TensorFlow name scope. Returns: A 3-D tensor of the same type and shape as `image`. @@ -375,7 +376,7 @@ def _rot90_3D(image, k, scope): (math_ops.equal(k, 3), _rot270)] result = control_flow_ops.case(cases, default=lambda: image, exclusive=True, - name=scope) + name=name_scope) result.set_shape([None, None, image.get_shape()[2]]) return result @@ -405,7 +406,7 @@ def _rot90_4D(images, k, name_scope): (math_ops.equal(k, 3), _rot270)] result = control_flow_ops.case(cases, default=lambda: images, exclusive=True, - name=scope) + name=name_scope) shape = result.get_shape() result.set_shape([shape[0], None, None, shape[3]]) return result @@ -430,7 +431,7 @@ def transpose_image(image): """ image = ops.convert_to_tensor(image, name='image') image = control_flow_ops.with_dependencies( - _Check3DImage(image, require_static=False), image) + _CheckAtLeast3DImage(image, require_static=False), image) shape = image.get_shape() if shape.ndims == 3 or shape.ndims is None: diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 4af9bd2a00..4b851dda72 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -842,7 +842,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase): class FlipTransposeRotateTest(test_util.TensorFlowTestCase): - def testIdempotentLeftRight(self): + def testInvolutionLeftRight(self): x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) @@ -850,6 +850,15 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, x_np) + def testInvolutionLeftRightWithBatch(self): + x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.flip_left_right(image_ops.flip_left_right(x_tf)) + y_tf = y.eval() + self.assertAllEqual(y_tf, x_np) + def testLeftRight(self): x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1]) @@ -860,9 +869,23 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, y_np) + def testLeftRightWithBatch(self): + x_np = np.array([[[1, 2, 3], [1, 2, 3]], [[1, 2, 3], [1, 2, 3]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + y_np = np.array([[[3, 2, 1], [3, 2, 1]], [[3, 2, 1], [3, 2, 1]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.flip_left_right(x_tf) + y_tf = y.eval() + self.assertAllEqual(y_tf, y_np) + + def testRandomFlipLeftRight(self): x_np = np.array([[1, 2, 3], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[3, 2, 1], [3, 2, 1]], dtype=np.uint8).reshape([2, 3, 1]) + seed = 42 with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) @@ -870,7 +893,7 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): count_flipped = 0 count_unflipped = 0 - for _ in range(50): + for _ in range(100): y_tf = y.eval() if y_tf[0][0] == 1: self.assertAllEqual(y_tf, x_np) @@ -878,10 +901,15 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): else: self.assertAllEqual(y_tf, y_np) count_flipped += 1 - self.assertGreaterEqual(count_flipped, 1) - self.assertGreaterEqual(count_unflipped, 1) - def testIdempotentUpDown(self): + # 100 trials + # Mean: 50 + # Std Dev: ~5 + # Six Sigma: 50 - (5 * 6) = 20 + self.assertGreaterEqual(count_flipped, 20) + self.assertGreaterEqual(count_unflipped, 20) + + def testInvolutionUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) with self.test_session(use_gpu=True): @@ -890,6 +918,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, x_np) + def testInvolutionUpDownWithBatch(self): + x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.flip_up_down(image_ops.flip_up_down(x_tf)) + y_tf = y.eval() + self.assertAllEqual(y_tf, x_np) + def testUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) @@ -900,16 +938,28 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, y_np) + def testUpDownWithBatch(self): + x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + y_np = np.array([[[4, 5, 6], [1, 2, 3]], [[10, 11, 12], [7, 8, 9]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.flip_up_down(x_tf) + y_tf = y.eval() + self.assertAllEqual(y_tf, y_np) + def testRandomFlipUpDown(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[4, 5, 6], [1, 2, 3]], dtype=np.uint8).reshape([2, 3, 1]) with self.test_session(use_gpu=True): x_tf = constant_op.constant(x_np, shape=x_np.shape) - y = image_ops.random_flip_up_down(x_tf) + y = image_ops.random_flip_up_down(x_tf, seed=42) count_flipped = 0 count_unflipped = 0 - for _ in range(50): + for _ in range(100): y_tf = y.eval() if y_tf[0][0] == 1: self.assertAllEqual(y_tf, x_np) @@ -917,10 +967,15 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): else: self.assertAllEqual(y_tf, y_np) count_flipped += 1 - self.assertGreaterEqual(count_flipped, 1) - self.assertGreaterEqual(count_unflipped, 1) - def testIdempotentTranspose(self): + # 100 trials + # Mean: 50 + # Std Dev: ~5 + # Six Sigma: 50 - (5 * 6) = 20 + self.assertGreaterEqual(count_flipped, 20) + self.assertGreaterEqual(count_unflipped, 20) + + def testInvolutionTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) with self.test_session(use_gpu=True): @@ -929,6 +984,16 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, x_np) + def testInvolutionTransposeWithBatch(self): + x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.transpose_image(image_ops.transpose_image(x_tf)) + y_tf = y.eval() + self.assertAllEqual(y_tf, x_np) + def testTranspose(self): x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint8).reshape([2, 3, 1]) y_np = np.array([[1, 4], [2, 5], [3, 6]], dtype=np.uint8).reshape([3, 2, 1]) @@ -939,28 +1004,52 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_tf = y.eval() self.assertAllEqual(y_tf, y_np) + def testTransposeWithBatch(self): + x_np = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], + dtype=np.uint8).reshape([2, 2, 3, 1]) + + y_np = np.array([[[1, 4], [2, 5], [3, 6]], [[7, 10], [8, 11], [9, 12]]], + dtype=np.uint8).reshape([2, 3, 2, 1]) + + with self.test_session(use_gpu=True): + x_tf = constant_op.constant(x_np, shape=x_np.shape) + y = image_ops.transpose_image(x_tf) + y_tf = y.eval() + self.assertAllEqual(y_tf, y_np) + def testPartialShapes(self): p_unknown_rank = array_ops.placeholder(dtypes.uint8) - p_unknown_dims = array_ops.placeholder( + p_unknown_dims_3 = array_ops.placeholder( dtypes.uint8, shape=[None, None, None]) + p_unknown_dims_4 = array_ops.placeholder( + dtypes.uint8, shape=[None, None, None, None]) p_unknown_width = array_ops.placeholder(dtypes.uint8, shape=[64, None, 3]) + p_unknown_batch = array_ops.placeholder(dtypes.uint8, + shape=[None, 64, 64, 3]) p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None]) p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3]) for op in [ image_ops.flip_left_right, image_ops.flip_up_down, - image_ops.random_flip_left_right, image_ops.random_flip_up_down, + #TODO: Fix after converting all methods + #image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_rank = op(p_unknown_rank) self.assertEqual(3, transformed_unknown_rank.get_shape().ndims) - transformed_unknown_dims = op(p_unknown_dims) - self.assertEqual(3, transformed_unknown_dims.get_shape().ndims) + transformed_unknown_dims_3 = op(p_unknown_dims_3) + self.assertEqual(3, transformed_unknown_dims_3.get_shape().ndims) + transformed_unknown_dims_4 = op(p_unknown_dims_4) + self.assertEqual(4, transformed_unknown_dims_4.get_shape().ndims) + transformed_unknown_width = op(p_unknown_width) self.assertEqual(3, transformed_unknown_width.get_shape().ndims) + transformed_unknown_batch = op(p_unknown_batch) + self.assertEqual(4, transformed_unknown_batch.get_shape().ndims) - with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): + with self.assertRaisesRegexp(ValueError, + "must be at least three-dimensional"): op(p_wrong_rank) with self.assertRaisesRegexp(ValueError, "must be > 0"): op(p_zero_dim) @@ -973,6 +1062,14 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): rotated = image_ops.rot90(rotated) self.assertAllEqual(image, rotated.eval()) + def testRot90GroupOrderWithBatch(self): + image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3]) + with self.test_session(use_gpu=True): + rotated = image + for _ in xrange(4): + rotated = image_ops.rot90(rotated) + self.assertAllEqual(image, rotated.eval()) + def testRot90NumpyEquivalence(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) with self.test_session(use_gpu=True): @@ -982,6 +1079,14 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): y_np = np.rot90(image, k=k) self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) + def testRot90NumpyEquivalenceWithBatch(self): + image = np.arange(48, dtype=np.uint8).reshape([2, 2, 4, 3]) + with self.test_session(use_gpu=True): + k_placeholder = array_ops.placeholder(dtypes.int32, shape=[]) + y_tf = image_ops.rot90(image, k_placeholder) + for k in xrange(4): + y_np = np.rot90(image, k=k, axes=(1, 2)) + self.assertAllEqual(y_np, y_tf.eval({k_placeholder: k})) class RandomFlipTest(test_util.TensorFlowTestCase): -- GitLab From 64960170768fd4529640c424d9d834097f737876 Mon Sep 17 00:00:00 2001 From: JoshVarty Date: Sat, 30 Dec 2017 05:16:21 +0000 Subject: [PATCH 007/629] Make sure random ops are tested properly --- tensorflow/python/ops/image_ops_test.py | 28 ++++++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 4b851dda72..feec7bc0c3 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1030,29 +1030,41 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None]) p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3]) + #Ops that support 3D input for op in [ image_ops.flip_left_right, image_ops.flip_up_down, - #TODO: Fix after converting all methods - #image_ops.random_flip_left_right, image_ops.random_flip_up_down, + image_ops.random_flip_left_right, image_ops.random_flip_up_down, image_ops.transpose_image, image_ops.rot90 ]: transformed_unknown_rank = op(p_unknown_rank) self.assertEqual(3, transformed_unknown_rank.get_shape().ndims) transformed_unknown_dims_3 = op(p_unknown_dims_3) self.assertEqual(3, transformed_unknown_dims_3.get_shape().ndims) - transformed_unknown_dims_4 = op(p_unknown_dims_4) - self.assertEqual(4, transformed_unknown_dims_4.get_shape().ndims) - transformed_unknown_width = op(p_unknown_width) self.assertEqual(3, transformed_unknown_width.get_shape().ndims) + + with self.assertRaisesRegexp(ValueError, "must be > 0"): + op(p_zero_dim) + + #Ops that support 4D input + for op in [ + image_ops.flip_left_right, image_ops.flip_up_down, + image_ops.transpose_image, image_ops.rot90 + ]: + transformed_unknown_dims_4 = op(p_unknown_dims_4) + self.assertEqual(4, transformed_unknown_dims_4.get_shape().ndims) transformed_unknown_batch = op(p_unknown_batch) self.assertEqual(4, transformed_unknown_batch.get_shape().ndims) - with self.assertRaisesRegexp(ValueError, "must be at least three-dimensional"): op(p_wrong_rank) - with self.assertRaisesRegexp(ValueError, "must be > 0"): - op(p_zero_dim) + + for op in [ + image_ops.random_flip_left_right, image_ops.random_flip_up_down, + ]: + with self.assertRaisesRegexp(ValueError, "must be three-dimensional"): + op(p_wrong_rank) + def testRot90GroupOrder(self): image = np.arange(24, dtype=np.uint8).reshape([2, 4, 3]) -- GitLab From b54e237977fa695d4f81bae60dc789320be4911c Mon Sep 17 00:00:00 2001 From: JoshVarty Date: Sat, 30 Dec 2017 06:52:52 +0000 Subject: [PATCH 008/629] Correct comment --- tensorflow/python/ops/image_ops_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index cf21609a55..414b344d99 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -329,7 +329,7 @@ def rot90(image, k=1, name=None): name: A name for this operation (optional). Returns: - A rotated of the same type and shape as `image`. + A rotated tensor of the same type and shape as `image`. Raises: ValueError: if the shape of `image` not supported. -- GitLab From 81ec5d20935352d71ff56fac06c36d6ff0a7ae05 Mon Sep 17 00:00:00 2001 From: Kamil Sindi Date: Thu, 4 Jan 2018 13:13:38 -0500 Subject: [PATCH 009/629] Export inception model after retrain --- .../examples/image_retraining/retrain.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index ec22684eaf..99ee56d2b0 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -96,6 +96,10 @@ Visualize the summaries with this command: tensorboard --logdir /tmp/retrain_logs +To use with Tensorflow Serving: + +tensorflow_model_server --port=9000 --model_name=inception --model_base_path=/tmp/saved_models/ + """ from __future__ import absolute_import from __future__ import division @@ -1004,6 +1008,38 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, return jpeg_data, mul_image +def export_model(sess, architecture, saved_model_dir): + if architecture == 'inception_v3': + input_tensor = 'DecodeJpeg/contents:0' + elif architecture.startswith('mobilenet_'): + input_tensor = 'input:0' + else: + raise ValueError('Unknown architecture', architecture) + in_image = sess.graph.get_tensor_by_name(input_tensor) + inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name('final_result:0') + outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME + ) + + legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') + + # Save out the SavedModel. + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature + }, + legacy_init_op=legacy_init_op) + builder.save() + + def main(_): # Needed to make sure the logging output is visible. # See https://github.com/tensorflow/tensorflow/issues/3047 @@ -1179,6 +1215,8 @@ def main(_): with gfile.FastGFile(FLAGS.output_labels, 'w') as f: f.write('\n'.join(image_lists.keys()) + '\n') + export_model(sess, FLAGS.architecture, FLAGS.saved_model_dir) + if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -1362,5 +1400,11 @@ if __name__ == '__main__': takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html for more information on Mobilenet.\ """) + parser.add_argument( + '--saved_model_dir', + type=str, + default='/tmp/saved_models/1/', + help='Where to save the exported graph.' + ) FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) -- GitLab From ad8d437b32fef6f0941d3834af5c78e971be0a34 Mon Sep 17 00:00:00 2001 From: Kamil Sindi Date: Thu, 4 Jan 2018 15:58:34 -0500 Subject: [PATCH 010/629] Add docstring to export_model --- tensorflow/examples/image_retraining/retrain.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 99ee56d2b0..fb21ccbbb7 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -1009,6 +1009,13 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, def export_model(sess, architecture, saved_model_dir): + """Exports model for serving. + + Args: + sess: Current active TensorFlow Session. + architecture: Model architecture. + saved_model_dir: Directory in which to save exported model and variables. + """ if architecture == 'inception_v3': input_tensor = 'DecodeJpeg/contents:0' elif architecture.startswith('mobilenet_'): -- GitLab From 14a8e6d68e358d58e21a9b6edd26bb105258281a Mon Sep 17 00:00:00 2001 From: tkunic Date: Wed, 3 Jan 2018 21:51:13 +0100 Subject: [PATCH 011/629] Move SpeechActivity animation to XML. --- .../android/res/animator/color_animation.xml | 30 +++++++++++++++++++ .../org/tensorflow/demo/SpeechActivity.java | 19 +++++------- 2 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 tensorflow/examples/android/res/animator/color_animation.xml diff --git a/tensorflow/examples/android/res/animator/color_animation.xml b/tensorflow/examples/android/res/animator/color_animation.xml new file mode 100644 index 0000000000..891d8cc1d4 --- /dev/null +++ b/tensorflow/examples/android/res/animator/color_animation.xml @@ -0,0 +1,30 @@ + + + + + diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java index 184df1bdb4..8a1d86d9ee 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/SpeechActivity.java @@ -31,7 +31,8 @@ the RecognizeCommands helper class. package org.tensorflow.demo; -import android.animation.ValueAnimator; +import android.animation.AnimatorInflater; +import android.animation.AnimatorSet; import android.app.Activity; import android.content.pm.PackageManager; import android.media.AudioFormat; @@ -329,17 +330,11 @@ public class SpeechActivity extends Activity { labelIndex = i; } } - final View labelView = (View) labelsListView.getChildAt(labelIndex - 2); - ValueAnimator colorAnimation = - ValueAnimator.ofArgb(0x00b3ccff, 0xffb3ccff, 0x00b3ccff); - colorAnimation.setDuration(750); - colorAnimation.addUpdateListener( - new ValueAnimator.AnimatorUpdateListener() { - @Override - public void onAnimationUpdate(ValueAnimator animator) { - labelView.setBackgroundColor((int) animator.getAnimatedValue()); - } - }); + final View labelView = labelsListView.getChildAt(labelIndex - 2); + + AnimatorSet colorAnimation = (AnimatorSet) AnimatorInflater.loadAnimator( + SpeechActivity.this, R.animator.color_animation); + colorAnimation.setTarget(labelView); colorAnimation.start(); } } -- GitLab From c36fdd1cf548e5a3d7854edbc217f93eee9d9b29 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 12 Jan 2018 22:16:07 +0000 Subject: [PATCH 012/629] Add unspecified dimensions (-1) support for noise_shape with tf.nn.dropout This fix tries to address the issue raised in 16034 where it was not possible to have unspecified dimensions for `noise_shape` with `tf.nn.dropout`. This fix adds the support so that it is possible to specify `noise_shape = [-1, 1, 1, -1]` instead of `noise_shape = [k, 1, 1, n]`. This fix fixes 16034. Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 34 ++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a9d57889c2..95a24ecc6d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2193,7 +2193,39 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di if tensor_util.constant_value(keep_prob) == 1: return x - noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x) + def noise_shape_func(x, noise_shape): + # If noise_shape is none return immediately. + if noise_shape is None: + return array_ops.shape(x) + + if isinstance(noise_shape, (tuple, list)) and not noise_shape: + dtype = dtypes.int32 + else: + dtype = None + noise_shape_ = ops.convert_to_tensor( + noise_shape, + dtype=dtype, + name="noise_shape") + + # Use constant_value (vs. constant_value_as_shape) as we need + # the distinction between "unknown" and explicit "not use" (`-1`). + # If noise_shape cannot be converted to a const tensor, + # then no need to check the value and leave it to rest of the logic. + noise_shape_ = tensor_util.constant_value(noise_shape_) + if noise_shape_ is None: + return noise_shape + + # Replace `-1` with shape in x + if noise_shape_ is not None and sum(noise_shape_ == -1) > 0: + if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_): + for i, dim in enumerate(x.shape.dims): + if noise_shape_[i] == -1 and dim.value is not None: + noise_shape_[i] = dim.value + + return noise_shape_ + + noise_shape = noise_shape_func(x, noise_shape) + # uniform [keep_prob, 1.0 + keep_prob) random_tensor = keep_prob random_tensor += random_ops.random_uniform(noise_shape, -- GitLab From 63db01abde2ad4977587591b1646f40b175f7faa Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 12 Jan 2018 22:19:09 +0000 Subject: [PATCH 013/629] Add test case for unspecified dimensions support of noise_shape in tf.nn.dropout Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_test.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index e8c24643a7..9a3bccc4c0 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -360,6 +360,31 @@ class DropoutTest(test_lib.TestCase): x, keep_prob, noise_shape=array_ops.placeholder(dtypes.int32)) self.assertEqual(x.get_shape(), dropout_x.get_shape()) + def testPartialShapedDropout(self): + x_dim = 40 * 30 + y_dim = 3 + num_iter = 10 + for keep_prob in [0.1, 0.5, 0.8]: + with self.test_session(): + t = constant_op.constant( + 1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) + # Set noise_shape=[-1, 1] which means [x_dim, 1]. + dropout = nn_ops.dropout(t, keep_prob, noise_shape=[-1, 1]) + self.assertEqual([x_dim, y_dim], dropout.get_shape()) + final_count = 0 + for _ in xrange(0, num_iter): + value = dropout.eval() + final_count += np.count_nonzero(value) + # Verifies that there are only two values: 0 and 1/keep_prob. + sorted_value = np.unique(np.sort(value)) + self.assertEqual(0, sorted_value[0]) + self.assertAllClose(1 / keep_prob, sorted_value[1]) + # Check that we are in the 15% error range + expected_count = x_dim * y_dim * keep_prob * num_iter + rel_error = math.fabs(final_count - expected_count) / expected_count + print(rel_error) + self.assertTrue(rel_error < 0.15) + def testInvalidKeepProb(self): x_dim = 40 y_dim = 30 -- GitLab From ae5e7ffeb7f5144f66350624c75fe3dfd0ecd8ce Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 24 Jan 2018 02:47:56 +0000 Subject: [PATCH 014/629] Use None instead of -1 for partial shape Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 38 +++++++++++++------------------- tensorflow/python/ops/nn_test.py | 4 ++-- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 95a24ecc6d..ccba7ab0e0 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2198,31 +2198,23 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di if noise_shape is None: return array_ops.shape(x) - if isinstance(noise_shape, (tuple, list)) and not noise_shape: - dtype = dtypes.int32 - else: - dtype = None - noise_shape_ = ops.convert_to_tensor( - noise_shape, - dtype=dtype, - name="noise_shape") - - # Use constant_value (vs. constant_value_as_shape) as we need - # the distinction between "unknown" and explicit "not use" (`-1`). - # If noise_shape cannot be converted to a const tensor, - # then no need to check the value and leave it to rest of the logic. - noise_shape_ = tensor_util.constant_value(noise_shape_) - if noise_shape_ is None: - return noise_shape - - # Replace `-1` with shape in x - if noise_shape_ is not None and sum(noise_shape_ == -1) > 0: - if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_): + try: + # Best effort to figure out the intended shape. + # If not possible, let the op to handle it. + noise_shape_ = tensor_shape.as_shape(noise_shape) + if (x.shape.dims is not None + and len(x.shape.dims) == len(noise_shape_.dims)): + new_dims = [] for i, dim in enumerate(x.shape.dims): - if noise_shape_[i] == -1 and dim.value is not None: - noise_shape_[i] = dim.value + if noise_shape_.dims[i].value is None and dim.value is not None: + new_dims.append(dim.value) + else: + new_dims.append(noise_shape_.dims[i].value) + return tensor_shape.TensorShape(new_dims) + except TypeError: + return noise_shape - return noise_shape_ + return noise_shape noise_shape = noise_shape_func(x, noise_shape) diff --git a/tensorflow/python/ops/nn_test.py b/tensorflow/python/ops/nn_test.py index 9a3bccc4c0..cc074a96e1 100644 --- a/tensorflow/python/ops/nn_test.py +++ b/tensorflow/python/ops/nn_test.py @@ -368,8 +368,8 @@ class DropoutTest(test_lib.TestCase): with self.test_session(): t = constant_op.constant( 1.0, shape=[x_dim, y_dim], dtype=dtypes.float32) - # Set noise_shape=[-1, 1] which means [x_dim, 1]. - dropout = nn_ops.dropout(t, keep_prob, noise_shape=[-1, 1]) + # Set noise_shape=[None, 1] which means [x_dim, 1]. + dropout = nn_ops.dropout(t, keep_prob, noise_shape=[None, 1]) self.assertEqual([x_dim, y_dim], dropout.get_shape()) final_count = 0 for _ in xrange(0, num_iter): -- GitLab From c20bb370a9009d5c20a17eb6cd35dde7eebe206f Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 24 Jan 2018 21:16:52 +0000 Subject: [PATCH 015/629] Correct exception Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ccba7ab0e0..ed894e598c 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2211,7 +2211,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di else: new_dims.append(noise_shape_.dims[i].value) return tensor_shape.TensorShape(new_dims) - except TypeError: + except Exception: return noise_shape return noise_shape -- GitLab From e89b1122f226be0c7aaaa1783f21a7f632754394 Mon Sep 17 00:00:00 2001 From: Jekyll Song Date: Thu, 25 Jan 2018 15:24:21 +0800 Subject: [PATCH 016/629] change from deprecated version to a new version --- tensorflow/examples/get_started/regression/imports85.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/get_started/regression/imports85.py b/tensorflow/examples/get_started/regression/imports85.py index 6bee556eb8..a8e4c782b3 100644 --- a/tensorflow/examples/get_started/regression/imports85.py +++ b/tensorflow/examples/get_started/regression/imports85.py @@ -131,7 +131,7 @@ def dataset(y_name="price", train_fraction=0.7): # booleans but we are dealing with symbolic tensors. return ~in_training_set(line) - base_dataset = (tf.contrib.data + base_dataset = (tf.data # Get the lines from the file. .TextLineDataset(path) # drop lines with question marks. -- GitLab From 111e3b6c2ce3083bcc932fd04505805de552df6c Mon Sep 17 00:00:00 2001 From: Jun Wang Date: Sat, 27 Jan 2018 16:47:42 +0800 Subject: [PATCH 017/629] use gather_nd to _gather_states in LSTMBlockWapper --- tensorflow/contrib/rnn/python/ops/lstm_ops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 04f342cd18..a8d08ac02f 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -572,9 +572,8 @@ class LSTMBlockWrapper(base_layer.Layer): def _gather_states(self, data, indices, batch_size): """Produce `out`, s.t. out(i, j) = data(indices(i), i, j).""" - mod_indices = indices * batch_size + math_ops.range(batch_size) - return array_ops.gather( - array_ops.reshape(data, [-1, self.num_units]), mod_indices) + return array_ops.gather_nd( + data, array_ops.stack([indices, math_ops.range(batch_size)], axis=1)) class LSTMBlockFusedCell(LSTMBlockWrapper): -- GitLab From b1f63441d223861f3f8aac17f85989604538dec9 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 27 Jan 2018 22:54:10 +0800 Subject: [PATCH 018/629] Enable [no]unroll for Clang on Windows --- tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc | 8 ++++---- .../kernels/parameterized_truncated_normal_op_gpu.cu.cc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index 126b64f73d..1c6776a3aa 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -24,12 +24,12 @@ limitations under the License. #include "tensorflow/core/util/cuda_kernel_helper.h" #include "tensorflow/core/util/tensor_format.h" -#if !defined(_MSC_VER) -#define UNROLL _Pragma("unroll") -#define NOUNROLL _Pragma("nounroll") -#else +#if defined(_MSC_VER) && !defined(__clang__) #define UNROLL #define NOUNROLL +#else +#define UNROLL _Pragma("unroll") +#define NOUNROLL _Pragma("nounroll") #endif namespace tensorflow { diff --git a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc index ddfeb1bb79..661d47d925 100644 --- a/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc +++ b/tensorflow/core/kernels/parameterized_truncated_normal_op_gpu.cu.cc @@ -29,7 +29,7 @@ limitations under the License. #include "tensorflow/core/lib/random/random_distributions.h" #include "tensorflow/core/util/cuda_kernel_helper.h" -#ifdef COMPILER_MSVC +#if defined(_MSC_VER) && !defined(__clang__) // msvc does not support unroll. One could try the loop pragma but we need to // take a closer look if this generates better code in this case. For now let // the compiler take care of it. -- GitLab From d92d3cd5b6c1726f75afed7099339052d1c79dab Mon Sep 17 00:00:00 2001 From: Vijay Vasudevan Date: Mon, 29 Jan 2018 11:40:23 -0800 Subject: [PATCH 019/629] small lint fix --- tensorflow/python/ops/nn_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ed894e598c..01eebd699f 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2202,8 +2202,8 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di # Best effort to figure out the intended shape. # If not possible, let the op to handle it. noise_shape_ = tensor_shape.as_shape(noise_shape) - if (x.shape.dims is not None - and len(x.shape.dims) == len(noise_shape_.dims)): + if (x.shape.dims is not None and + len(x.shape.dims) == len(noise_shape_.dims)): new_dims = [] for i, dim in enumerate(x.shape.dims): if noise_shape_.dims[i].value is None and dim.value is not None: -- GitLab From 2b19edf04637ce217c72a5b3cd2c64a9a9dd71e1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 29 Jan 2018 22:53:23 +0000 Subject: [PATCH 020/629] Move noise_shape function to nn_ops.py so that is could potentially be used by layers. Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 47 ++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 01eebd699f..acbc241859 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2142,6 +2142,28 @@ def xw_plus_b_v1(x, weights, biases, name=None): # pylint: disable=invalid-name mm = math_ops.matmul(x, weights) return bias_add_v1(mm, biases, name=name) +def _get_noise_shape(x, noise_shape): + # If noise_shape is none return immediately. + if noise_shape is None: + return array_ops.shape(x) + + try: + # Best effort to figure out the intended shape. + # If not possible, let the op to handle it. + noise_shape_ = tensor_shape.as_shape(noise_shape) + if (x.shape.dims is not None and + len(x.shape.dims) == len(noise_shape_.dims)): + new_dims = [] + for i, dim in enumerate(x.shape.dims): + if noise_shape_.dims[i].value is None and dim.value is not None: + new_dims.append(dim.value) + else: + new_dims.append(noise_shape_.dims[i].value) + return tensor_shape.TensorShape(new_dims) + except (TypeError, ValueError): + return noise_shape + + return noise_shape def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: disable=invalid-name """Computes dropout. @@ -2193,30 +2215,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di if tensor_util.constant_value(keep_prob) == 1: return x - def noise_shape_func(x, noise_shape): - # If noise_shape is none return immediately. - if noise_shape is None: - return array_ops.shape(x) - - try: - # Best effort to figure out the intended shape. - # If not possible, let the op to handle it. - noise_shape_ = tensor_shape.as_shape(noise_shape) - if (x.shape.dims is not None and - len(x.shape.dims) == len(noise_shape_.dims)): - new_dims = [] - for i, dim in enumerate(x.shape.dims): - if noise_shape_.dims[i].value is None and dim.value is not None: - new_dims.append(dim.value) - else: - new_dims.append(noise_shape_.dims[i].value) - return tensor_shape.TensorShape(new_dims) - except Exception: - return noise_shape - - return noise_shape - - noise_shape = noise_shape_func(x, noise_shape) + noise_shape = _get_noise_shape(x, noise_shape) # uniform [keep_prob, 1.0 + keep_prob) random_tensor = keep_prob -- GitLab From df7c1e534f53c9c9173d07947a85531f69efb081 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Mon, 22 Jan 2018 21:31:47 -0800 Subject: [PATCH 021/629] add remove control deps transform --- tensorflow/tools/graph_transforms/BUILD | 1 + .../remove_control_dependencies.cc | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tensorflow/tools/graph_transforms/remove_control_dependencies.cc diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index b5465b7fb3..de4821340e 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -102,6 +102,7 @@ cc_library( "remove_ema.cc", "obfuscate_names.cc", "remove_attribute.cc", + "remove_control_dependencies.cc", "remove_device.cc", "remove_nodes.cc", "rename_attribute.cc", diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc new file mode 100644 index 0000000000..a351e6812b --- /dev/null +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tensorflow { +namespace graph_transforms { + +// Changes the op type of a specified op. +Status RemoveControlDependencies(const GraphDef& input_graph_def, + const TransformFuncContext& context, + GraphDef* output_graph_def) { + output_graph_def->Clear(); + for (const NodeDef& node : input_graph_def.node()) { + NodeDef* new_node = output_graph_def->mutable_node()->Add(); + *new_node = node; + new_node->clear_input(); + for (const auto& input : node.input()) { + if (input[0] != '^') { + new_node->add_input(input); + } + } + } + return Status::OK(); +} + +REGISTER_GRAPH_TRANSFORM("remove_control_dependencies", RemoveControlDependencies); + +} // namespace graph_transforms +} // namespace tensorflow -- GitLab From 6e505506524a10314c611b3f65127d847f69a1a0 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Tue, 23 Jan 2018 12:22:12 -0800 Subject: [PATCH 022/629] Add better docs --- .../tools/graph_transforms/remove_control_dependencies.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index a351e6812b..d4c369f148 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -10,7 +10,10 @@ namespace tensorflow { namespace graph_transforms { -// Changes the op type of a specified op. +// Remove control depdencies in preparation for inference. +// In the tensorflow graph, control dependencies are represented as extra +// inputs which are referenced with "^tensor_name". +// See node_def.proto for more details. Status RemoveControlDependencies(const GraphDef& input_graph_def, const TransformFuncContext& context, GraphDef* output_graph_def) { -- GitLab From c2fb1d9ca46f1a2cb24452c20655ee1600fe3e41 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Tue, 23 Jan 2018 12:28:00 -0800 Subject: [PATCH 023/629] remove unnecessary includes --- .../tools/graph_transforms/remove_control_dependencies.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index d4c369f148..901ddb8962 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -1,10 +1,5 @@ -#include #include #include -#include -#include -#include -#include #include namespace tensorflow { -- GitLab From 873f45e7150929e3427f2a504451de71d974686d Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Mon, 29 Jan 2018 15:46:11 -0800 Subject: [PATCH 024/629] add copyright and docs to readme --- tensorflow/tools/graph_transforms/README.md | 7 +++++++ .../remove_control_dependencies.cc | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md index 345d9eadb8..67badb4869 100644 --- a/tensorflow/tools/graph_transforms/README.md +++ b/tensorflow/tools/graph_transforms/README.md @@ -639,6 +639,13 @@ specified devices may not be available. In order to work with graphs like these, you can run this transform to wipe the slate clean and delete the device specifier from all ops. +### remove_control_dependencies + +Args: None \ +Prerequisites: None + +Removes all control dependencies from the graph. + ### remove_nodes Args: diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index 901ddb8962..ba6df633be 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -1,3 +1,17 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include #include #include -- GitLab From 9a67c529db09fd37d200638a5b8a72176359d73b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 29 Jan 2018 23:51:11 +0000 Subject: [PATCH 025/629] Call nn_ops._get_noise_shape from Dropout in layers Signed-off-by: Yong Tang --- tensorflow/python/layers/core.py | 9 ++------- tensorflow/python/ops/nn_ops.py | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index e5b93a54f7..f6350b3d55 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -36,6 +36,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import standard_ops @@ -292,13 +293,7 @@ class Dropout(base.Layer): # shapes with dynamically sized inputs. if self.noise_shape is None: return self.noise_shape - - symbolic_shape = array_ops.shape(inputs) - noise_shape = [ - symbolic_shape[axis] if shape is None else shape - for axis, shape in enumerate(self.noise_shape) - ] - return noise_shape + return nn_ops._get_noise_shape(inputs, self.noise_shape) def call(self, inputs, training=False): diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index acbc241859..497d901155 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2150,19 +2150,21 @@ def _get_noise_shape(x, noise_shape): try: # Best effort to figure out the intended shape. # If not possible, let the op to handle it. + # In eager mode exception will show up. noise_shape_ = tensor_shape.as_shape(noise_shape) - if (x.shape.dims is not None and - len(x.shape.dims) == len(noise_shape_.dims)): - new_dims = [] - for i, dim in enumerate(x.shape.dims): - if noise_shape_.dims[i].value is None and dim.value is not None: - new_dims.append(dim.value) - else: - new_dims.append(noise_shape_.dims[i].value) - return tensor_shape.TensorShape(new_dims) except (TypeError, ValueError): return noise_shape + if (x.shape.dims is not None and + len(x.shape.dims) == len(noise_shape_.dims)): + new_dims = [] + for i, dim in enumerate(x.shape.dims): + if noise_shape_.dims[i].value is None and dim.value is not None: + new_dims.append(dim.value) + else: + new_dims.append(noise_shape_.dims[i].value) + return tensor_shape.TensorShape(new_dims) + return noise_shape def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: disable=invalid-name -- GitLab From 714b53281d39dd9e172c67cc567264b2f8d47f76 Mon Sep 17 00:00:00 2001 From: Peng Yu Date: Mon, 29 Jan 2018 21:51:17 -0500 Subject: [PATCH 026/629] add visiablity to public proto --- tensorflow/contrib/tensor_forest/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 58a7fa095d..6f181288d5 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -498,6 +498,7 @@ py_library( "//tensorflow/contrib/decision_trees/proto:generic_tree_model_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/tensor_forest/proto:tensor_forest_params_proto_py", + "//tensorflow/contrib/tensor_forest/proto:fertile_stats_proto_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", -- GitLab From 903688ccc0672e2ff4bef2ee29e59d146ea9a495 Mon Sep 17 00:00:00 2001 From: Boris Pfahringer Date: Mon, 29 Jan 2018 15:41:49 +0000 Subject: [PATCH 027/629] Accept directory path in check_eventfile_for_keyword() helper.' So that we can look in the evaluation directory if we want to test evaluation summary writing. --- tensorflow/python/estimator/estimator_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 39a5b998eb..b3057afa29 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -80,13 +80,13 @@ def dummy_model_fn(features, labels, params): _, _, _ = features, labels, params -def check_eventfile_for_keyword(keyword, est): +def check_eventfile_for_keyword(keyword, dir_): """Checks event files for the keyword.""" writer_cache.FileWriterCache.clear() # Get last Event written. - event_paths = glob.glob(os.path.join(est.model_dir, 'events*')) + event_paths = glob.glob(os.path.join(dir_, 'events*')) last_event = None for last_event in summary_iterator.summary_iterator(event_paths[-1]): if last_event.summary is not None: @@ -610,7 +610,7 @@ class EstimatorTrainTest(test.TestCase): # Make sure nothing is stuck in limbo. writer_cache.FileWriterCache.clear() - if check_eventfile_for_keyword('loss', est): + if check_eventfile_for_keyword('loss', est.model_dir): return self.fail('{} should be part of reported summaries.'.format('loss')) @@ -1291,7 +1291,7 @@ class EstimatorEvaluateTest(test.TestCase): writer_cache.FileWriterCache.clear() # Get last Event written. - if check_eventfile_for_keyword('image', est): + if check_eventfile_for_keyword('image', est.model_dir): return self.fail('{} should be part of reported summaries.'.format('image')) -- GitLab From 1230811c773756fdb2dff830da432c1dff4675e1 Mon Sep 17 00:00:00 2001 From: Boris Pfahringer Date: Wed, 31 Jan 2018 19:44:52 +0100 Subject: [PATCH 028/629] Look at all summary values when looking for a tag. --- tensorflow/python/estimator/estimator_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index b3057afa29..65c9bd1d11 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -90,8 +90,8 @@ def check_eventfile_for_keyword(keyword, dir_): last_event = None for last_event in summary_iterator.summary_iterator(event_paths[-1]): if last_event.summary is not None: - if last_event.summary.value: - if keyword in last_event.summary.value[0].tag: + for value in last_event.summary.value: + if keyword in value.tag: return True return False -- GitLab From 92d622d27fbb2fe1f2e29a17a606c166fcc12f35 Mon Sep 17 00:00:00 2001 From: Boris Pfahringer Date: Mon, 29 Jan 2018 15:45:44 +0000 Subject: [PATCH 029/629] Check the evaluation events in the evaluation tests. --- tensorflow/python/estimator/estimator_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 65c9bd1d11..1af331697e 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -1290,8 +1290,8 @@ class EstimatorEvaluateTest(test.TestCase): # Make sure nothing is stuck in limbo. writer_cache.FileWriterCache.clear() - # Get last Event written. - if check_eventfile_for_keyword('image', est.model_dir): + # Get last evaluation Event written. + if check_eventfile_for_keyword('image', os.path.join(est.model_dir, 'eval')): return self.fail('{} should be part of reported summaries.'.format('image')) -- GitLab From 0b012956d2c86583c189121a3b292de3c5e0cc6c Mon Sep 17 00:00:00 2001 From: Boris Pfahringer Date: Mon, 29 Jan 2018 15:51:15 +0000 Subject: [PATCH 030/629] Fix writing binary summaries in python3. In python 3, tf.string tensors are bytes() objects when fetched, not str(). six.binary_type maps exactly to tf.string in python 2 and 3, so use it to determine if we should try to interpret a metric as a tf.Summary proto. --- tensorflow/python/estimator/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 78d74b63d3..f938ee0110 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1103,7 +1103,7 @@ def _write_dict_to_summary(output_dir, isinstance(dictionary[key], np.int32) or isinstance(dictionary[key], int)): summary_proto.value.add(tag=key, simple_value=int(dictionary[key])) - elif isinstance(dictionary[key], six.string_types): + elif isinstance(dictionary[key], six.binary_type): try: summ = summary_pb2.Summary.FromString(dictionary[key]) for i, _ in enumerate(summ.value): -- GitLab From 3e932b668daf1534679811c58b83478873bb9573 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 1 Feb 2018 09:56:43 -0800 Subject: [PATCH 031/629] Adding Release notes for r1.6. --- RELEASE.md | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index af6440acef..728a840002 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,11 +1,43 @@ -# Release 1.5.0 +# Release 1.6.0 ## Breaking Changes * Prebuilt binaries are now built against CUDA 9.0 and cuDNN 7. -* Our Linux binaries are built using ubuntu 16 containers, potentially - introducing glibc incompatibility issues with ubuntu 14. -* Starting from 1.6 release, our prebuilt binaries will use AVX instructions. - This may break TF on older CPUs. +* Prebuilt binaries will use AVX instructions. This may break TF on older CPUs. + +## Major Features And Improvements +* New Optimizer internal API for non-slot variables. Descendants of AdamOptimizer that access _beta[12]_power will need to be updated. +* `tf.estimator.{FinalExporter,LatestExporter}` now export stripped SavedModels. This improves forward compatibility of the SavedModel. +* FFT support added to XLA CPU/GPU. + +## Bug Fixes and Other Changes +* Documentation updates: + * Added a second version of Getting Started, which is aimed at ML +newcomers. + * Clarified documentation on `resize_images.align_corners` parameter. + * Additional documentation for TPUs. +* Google Cloud Storage (GCS): + * Add client-side throttle. + * Add a `FlushCaches()` method to the FileSystem interface, with an implementation for GcsFileSystem. +* Other: + * Add `tf.contrib.distributions.Kumaraswamy`. + * `RetryingFileSystem::FlushCaches()` calls the base FileSystem's `FlushCaches()`. + * Add auto_correlation to distributions. + * Add `tf.contrib.distributions.Autoregressive`. + * Add SeparableConv1D layer. + * Add convolutional Flipout layers. + * When both inputs of `tf.matmul` are bfloat16, it returns bfloat16, instead of float32. + * Added `tf.contrib.image.connected_components`. + * Add `tf.contrib.framework.CriticalSection` that allows atomic variable access. + * Output variance over trees predictions for classifications tasks. + * For `pt` and `eval` commands, allow writing tensor values to filesystem as numpy files. + * gRPC: Propagate truncated errors (instead of returning gRPC internal error). + * Augment parallel_interleave to support 2 kinds of prefetching. + * Improved XLA support for C64-related ops log, pow, atan2, tanh. + * Add probabilistic convolutional layers. + +## API Changes +* Introducing prepare_variance boolean with default setting to False for backward compatibility. +* Move `layers_dense_variational_impl.py` to `layers_dense_variational.py`. ## Known Bugs * Using XLA:GPU with CUDA 9 and CUDA 9.1 results in garbage results and/or @@ -28,6 +60,42 @@ TensorFlow will print a warning if you use XLA:GPU with a known-bad version of CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Ag Ramesh, Aiden Scandella, Akimasa Kimura, Alex Rothberg, Allen Goodman, +amilioto, Andrei Costinescu, Andrei Nigmatulin, Anjum Sayed, Anthony Platanios, +Anush Elangovan, Armando Fandango, Ashish Kumar Ram, Ashwini Shukla, Ben, Bhavani Subramanian, +Brett Koonce, Carl Thomé, cclauss, Cesc, Changming Sun, Christoph Boeddeker, Clayne Robison, +Clemens Schulz, Clint (Woonhyuk Baek), codrut3, Cole Gerdemann, Colin Raffel, Daniel Trebbien, +Daniel Ylitalo, Daniel Zhang, Daniyar, Darjan Salaj, Dave Maclachlan, David Norman, Dong--Jian, +dongsamb, dssgsra, Edward H, eladweiss, elilienstein, Eric Lilienstein, error.d, Eunji Jeong, fanlu, +Florian Courtial, fo40225, Fred, Gregg Helt, Guozhong Zhuang, Hanchen Li, hsm207, hyunyoung2, +ImSheridan, Ishant Mrinal Haloi, Jacky Ko, Jay Young, Jean Flaherty, Jerome, JerrikEph, Jesse +Kinkead, jfaath, Jian Lin, jinghuangintel, Jiongyan Zhang, Joel Hestness, Joel Shor, Johnny Chan, +Julian Niedermeier, Julian Wolff, JxKing, K-W-W, Karl Lessard, Kasper Marstal, Keiji Ariyama, +Koan-Sin Tan, Loki Der Quaeler, Loo Rong Jie, Luke Schaefer, Lynn Jackson, ManHyuk, Matt Basta, +Matt Smith, Matthew Schulkind, Michael, michaelkhan3, Miguel Piedrafita, Mikalai Drabovich, +Mike Knapp, mjwen, mktozk, Mohamed Aly, Mohammad Ashraf Bhuiyan, Myungjoo Ham, Naman Bhalla, +Namrata-Ibm, Nathan Luehr, nathansilberman, Netzeband, Niranjan Hasabnis, Omar Aflak, Ozge +Yalcinkaya, Parth P Panchal, patrickzzy, Patryk Chrabaszcz, Paul Van Eck, Paweł Kapica, Peng Yu, +Philip Yang, Pierre Blondeau, Po-Hsien Chu, powderluv, Puyu Wang, Rajendra Arora, Rasmus, Renat +Idrisov, resec, Robin Richtsfeld, Ronald Eddy Jr, Sahil Singh, Sam Matzek, Sami Kama, sandipmgiri, +Santiago Castro, Sayed Hadi Hashemi, Scott Tseng, Sergii Khomenko, Shahid, Shengpeng Liu, Shreyash +Sharma, Shrinidhi Kl, Simone Cirillo, simsicon, Stanislav Levental, starsblinking, Stephen Lumenta, +Steven Hickson, Su Tang, Taehoon Lee, Takuya Wakisaka, Ted Chang, Ted Ying, Tijmen Verhulsdonck, +Timofey Kondrashov, vade, vaibhav, Valentin Khrulkov, vchigrin, Victor Costan, Viraj Navkal, +Vivek Rane, wagonhelm, Yan Facai (颜发才), Yanbo Liang, Yaroslav Bulatov, yegord, Yong Tang, +Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田传武 + +# Release 1.5.0 + +## Breaking Changes +* Prebuilt binaries are now built against CUDA 9.0 and cuDNN 7. +* Starting from 1.6 release, our prebuilt binaries will use AVX instructions. + This may break TF on older CPUs. + ## Major Features And Improvements * [Eager execution](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/eager) preview version is now available. -- GitLab From 68c52b926eb8cba2b43a37cde4a9658654427e70 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 1 Feb 2018 10:27:29 -0800 Subject: [PATCH 032/629] Adding the known bugs to 1.5 as well. --- RELEASE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 728a840002..0fad3b5d41 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -214,6 +214,27 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田 * Minor refactor: move stats files from `stochastic` to `common` and remove `stochastic`. +## Known Bugs +* Using XLA:GPU with CUDA 9 and CUDA 9.1 results in garbage results and/or + `CUDA_ILLEGAL_ADDRESS` failures. + + Google discovered in mid-December 2017 that the PTX-to-SASS compiler in CUDA 9 + and CUDA 9.1 sometimes does not properly compute the carry bit when + decomposing 64-bit address calculations with large offsets (e.g. `load [x + + large_constant]`) into 32-bit arithmetic in SASS. + + As a result, these versions of `ptxas` miscompile most XLA programs which use + more than 4GB of temp memory. This results in garbage results and/or + `CUDA_ERROR_ILLEGAL_ADDRESS` failures. + + A fix in CUDA 9.1.121 is expected in late February 2018. We do not expect a + fix for CUDA 9.0.x. Until the fix is available, the only workaround is to + [downgrade](https://developer.nvidia.com/cuda-toolkit-archive) to CUDA 8.0.x + or disable XLA:GPU. + + TensorFlow will print a warning if you use XLA:GPU with a known-bad version of + CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: -- GitLab From c63fb0841bc020ddee2a4eba337eca1cc49c2eff Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 11:14:18 -0800 Subject: [PATCH 033/629] Update version to 1.6.0-rc0. (#16701) * Updating the version to 1.6.0-rc0. * Removing the escape character. * Updating the cloud tpu version. --- .../eager/python/examples/mnist/mnist.py | 2 +- .../contrib/tpu/profiler/pip_package/setup.py | 2 +- tensorflow/core/public/version.h | 4 ++-- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 10 ++++++--- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 13 files changed, 44 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py index ed7dbc8904..772f59562b 100644 --- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py +++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py @@ -39,7 +39,7 @@ class MNISTModel(tfe.Network): """MNIST Network. Network structure is equivalent to: - https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py + https://github.com/tensorflow/tensorflow/blob/r1.6/tensorflow/examples/tutorials/mnist/mnist_deep.py and https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index 3dffebe668..cb61984799 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.5.0-rc1' +_VERSION = '1.6.0-rc0' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index b02f899b87..50bfa91267 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 5 +#define TF_MINOR_VERSION 6 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 14add7c77e..a783205b4a 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index d2af9d9843..5249e04615 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index e5388c4b1e..0c6c773e62 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.5.0 + 1.6.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.5.0 + 1.6.0-rc0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.5.0 + 1.6.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.5.0 + 1.6.0-rc0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.5.0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.5.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.5.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index cd8c14599f..105b225177 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index f49d3a2f08..a6ea548cfb 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -238,7 +238,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl @@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
 
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index bc7d2080dc..36dffd85dc 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.5.0 on Linux: +for TensorFlow 1.6.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc0-py2-none-any.whl
 
## Validate your installation @@ -460,7 +460,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - + + @@ -478,6 +479,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
tensorflow_gpu-1.5.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.079
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
+ @@ -491,6 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
+ + diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 5dc4a053fd..d16761c367 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 96b260ad3a..3690e7dfe5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.5 +ARG TF_BRANCH=r1.6 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 07ffd3839a..4ef37881bc 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d7fab2b93a..2002786999 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.5.0' +_VERSION = '1.6.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From a0e31f28df22030b4ce33db49e999f56622aa693 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Fri, 2 Feb 2018 11:02:34 -0800 Subject: [PATCH 034/629] Remove all_files rule from com_google_absl.BUILD --- third_party/com_google_absl.BUILD | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/third_party/com_google_absl.BUILD b/third_party/com_google_absl.BUILD index 0c8d327c1f..8fca145f75 100644 --- a/third_party/com_google_absl.BUILD +++ b/third_party/com_google_absl.BUILD @@ -3,15 +3,3 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache exports_files(["LICENSE"]) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) -- GitLab From 0fd0f2ded169c383a564ffbe9564b6c3d5a684f3 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Fri, 2 Feb 2018 12:42:04 -0800 Subject: [PATCH 035/629] Fix sanity --- tensorflow/tools/ci_build/ci_sanity.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index b3a8ff2ac7..fd5d005844 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -351,7 +351,7 @@ do_external_licenses_check(){ # Whitelist echo ${EXTRA_LICENSE_FILE} - grep -e "@bazel_tools//src/" -e "@bazel_tools//tools/" -e "@com_google_absl//" -e "//external" -e "@local" -v ${EXTRA_LICENSES_FILE} > temp.txt + grep -e "@bazel_tools//src" -e "@bazel_tools//tools/" -e "@com_google_absl//" -e "//external" -e "@local" -v ${EXTRA_LICENSES_FILE} > temp.txt mv temp.txt ${EXTRA_LICENSES_FILE} -- GitLab From 31bfdac6fa5cdf25ebcbf297f54bcaa042f1ec14 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:00:44 -0800 Subject: [PATCH 036/629] Revert "Update external protobuf codebase version for Windows cmake build" This reverts commit 07bec47ba5db4c2f2e33ecb49f23253a371bfbbe. --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index fd05fa6d47..aedb793d2a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) +set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) if(WIN32) set(protobuf_STATIC_LIBRARIES -- GitLab From e8341fe47417fece104f24fc7067c105c507aa95 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:03:49 -0800 Subject: [PATCH 037/629] Fixing the typo. --- tensorflow/core/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a8a8c34846..dba9f6f0e0 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,7 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["//third_party_gpus/cuda:cuda_headers"], + cuda_deps = ["//third_party/gpus/cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From b300c52026ba152e62fc6d7e7e8c2cc515677212 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:46:48 -0800 Subject: [PATCH 038/629] Fixing the path post transition to GitHub. --- tensorflow/core/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index dba9f6f0e0..5b38c10032 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,7 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["//third_party/gpus/cuda:cuda_headers"], + cuda_deps = ["@local_config_cuda//cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From f42bf6706d12369c7686bae1ecac8f6957daa78a Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:52:30 -0800 Subject: [PATCH 039/629] Revert "Update deprecated API use" This reverts commit ccedcbe14c798fb3b227030cf85b4fe89406f0d8. --- tensorflow/core/distributed_runtime/tensor_coding.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc index 34a4013547..fe2d1a1293 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding.cc @@ -81,7 +81,7 @@ void TensorResponse::InitPartial(const RecvTensorResponse& response) { Status TensorResponse::ParseFrom(Source* source) { if (!on_host_) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); -- GitLab From 782ccd93198f0187e4cb52aad489b49c6960d074 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 2 Feb 2018 13:57:11 -0800 Subject: [PATCH 040/629] Add k8 to detection for when to use neon_tensor_utils. --- tensorflow/contrib/lite/kernels/internal/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 38b032c6de..2c29b2abb8 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -330,6 +330,9 @@ cc_library( ":x86": [ ":neon_tensor_utils", ], + ":k8": [ + ":neon_tensor_utils", + ], ":darwin": [ ":neon_tensor_utils", ], -- GitLab From 075482c55bd8fa87eeee0f430ddf38d081cf16ec Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 14:08:30 -0800 Subject: [PATCH 041/629] Update BUILD --- tensorflow/core/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5b38c10032..45edbe9652 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,6 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["@local_config_cuda//cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From a8efd478702bbd4503f8afa9559b86a54d8544eb Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 2 Feb 2018 15:00:17 -0800 Subject: [PATCH 042/629] Adds batch inference support on TPU with TPUEstimator.predict. PiperOrigin-RevId: 184339842 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 627 +++++++++++++++--- 1 file changed, 541 insertions(+), 86 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index c7008533f3..b5082fc823 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -47,6 +47,7 @@ from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -170,10 +171,12 @@ class _TPUContext(object): ``` """ - def __init__(self, config, train_batch_size, eval_batch_size, use_tpu): + def __init__(self, config, train_batch_size, eval_batch_size, + predict_batch_size, use_tpu): self._config = config self._train_batch_size = train_batch_size self._eval_batch_size = eval_batch_size + self._predict_batch_size = predict_batch_size self._use_tpu = use_tpu self._num_shards_or_none = self._config.tpu_config.num_shards self._mode = None @@ -218,31 +221,66 @@ class _TPUContext(object): return (self._mode == model_fn_lib.ModeKeys.TRAIN and not self._config.tpu_config.per_host_input_for_training) - def is_running_on_cpu(self): - """Determines whether the input_fn and model_fn should be invoked on CPU.""" + def is_running_on_cpu(self, is_export_mode=False): + """Determines whether the input_fn and model_fn should be invoked on CPU. + + Args: + is_export_mode: Indicates whether the current mode is for exporting the + model, when mode == PREDICT. Only with this bool, we could + tell whether user is calling the Estimator.predict or + Estimator.export_savedmodel, which are running on TPU and CPU + respectively. Parent class Estimator does not distingush these two. + + Returns: + bool, whether current input_fn or model_fn should be running on CPU. + + Raises: + ValueError: any configuration is invalid. + """ mode = self._assert_mode() - return (not self._use_tpu) or mode == model_fn_lib.ModeKeys.PREDICT + + if not self._use_tpu: + return True + + if mode != model_fn_lib.ModeKeys.PREDICT: + return False + + # There are actually 2 use cases when running with mode.PREDICT: prediction + # and saving the model. We run actual predictions on the TPU, but + # model export is run on the CPU. + if is_export_mode: + return True + + if self._predict_batch_size is None: + raise ValueError( + 'predict_batch_size in TPUEstimator constructor should not be ' + '`None` if .predict is running on TPU.') + if self.num_hosts > 1: + raise ValueError( + 'TPUEstimator.predict should be running on single host.') + + return False @property def global_batch_size(self): mode = self._assert_mode() - return (self._train_batch_size - if mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) + if mode == model_fn_lib.ModeKeys.TRAIN: + return self._train_batch_size + elif mode == model_fn_lib.ModeKeys.EVAL: + return self._eval_batch_size + elif mode == model_fn_lib.ModeKeys.PREDICT: + return self._predict_batch_size + else: + return None @property def batch_size_for_input_fn(self): """Returns the shard batch size for `input_fn`.""" - mode = self._assert_mode() + global_batch_size = self.global_batch_size + if self.is_running_on_cpu(): - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size - if mode == model_fn_lib.ModeKeys.EVAL: - return self._eval_batch_size - return None + return global_batch_size - global_batch_size = ( - self._train_batch_size - if mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) # On TPU if self.is_input_sharded_per_core(): return global_batch_size // self.num_cores @@ -252,19 +290,13 @@ class _TPUContext(object): @property def batch_size_for_model_fn(self): """Returns the shard batch size for `model_fn`.""" - mode = self._assert_mode() + global_batch_size = self.global_batch_size + if self.is_running_on_cpu(): - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size - if mode == model_fn_lib.ModeKeys.EVAL: - return self._eval_batch_size - return None + return global_batch_size # On TPU. always sharded per core. - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size // self.num_cores - else: - return self._eval_batch_size // self.num_cores + return global_batch_size // self.num_cores @property def master_job(self): @@ -506,6 +538,22 @@ class _OpQueueContext(object): self._thread.join() +class _OpSignalOnceQueueContext(_OpQueueContext): + """Manages work queue and thread for a infeed/outfeed thread. + + This subclass only signals once. + """ + + def __init__(self, name, target, args): + super(_OpSignalOnceQueueContext, self).__init__(name, target, args) + self._has_signaled = False + + def send_next_batch_signal(self, iterations): + if not self._has_signaled: + self._queue.put(iterations) + self._has_signaled = True + + class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): """A Session hook setting up the TPU initialization, infeed, and outfeed. @@ -633,13 +681,16 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): except Exception as e: # pylint: disable=broad-except self._log_error(session, e) + def _create_infeed_controller(self, name, target, args): + return _OpQueueContext(name=name, target=target, args=args) + def after_create_session(self, session, coord): logging.info('Init TPU system') session.run(self._init_ops, options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000)) logging.info('Start infeed thread controller') - self._infeed_controller = _OpQueueContext( + self._infeed_controller = self._create_infeed_controller( name='InfeedController', target=self._run_infeed, args=(session,)) logging.info('Start outfeed thread controller') @@ -677,6 +728,16 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): session.run(self._finalize_ops) +class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook): + + def __init__(self, ctx, enqueue_ops, dequeue_ops): + super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__( + ctx, enqueue_ops, dequeue_ops, run_infeed_loop_on_coordinator=False) + + def _create_infeed_controller(self, name, target, args): + return _OpSignalOnceQueueContext(name=name, target=target, args=args) + + class _TPUStopAtStepHook(session_run_hook.SessionRunHook): """Hook that requests stop at a specified step. @@ -764,6 +825,47 @@ class _SetEvalIterationsHook(session_run_hook.SessionRunHook): self._iterations_per_loop_var.load(self._num_steps, session=session) +class _StoppingPredictHook(session_run_hook.SessionRunHook): + """Hook that requests stop according to the stopping signal in prediction.""" + + def __init__(self, scalar_stopping_signal): + self._scalar_stopping_signal = scalar_stopping_signal + + def begin(self): + self._iterations_per_loop_var = _create_or_get_iterations_per_loop() + + def after_create_session(self, session, coord): + # This is not necessary as we do not run infeed enqueue and outfeed dequeue + # in side threads for prediction model. But it makes the + # TPUInfeedOutfeedSessionHook prints nice message. + self._iterations_per_loop_var.load(1, session=session) + + def before_run(self, run_context): + return session_run_hook.SessionRunArgs(self._scalar_stopping_signal) + + def after_run(self, run_context, run_values): + _ = run_context + scalar_stopping_signal = run_values.results + if _StopSignals.should_stop(scalar_stopping_signal): + # NOTE(xiejw): In prediction, stopping signals are inserted for each + # batch. And we append one more batch to signal the system it should stop. + # The data flow might look like + # + # batch 0: images, labels, stop = 0 (user provideded) + # batch 1: images, labels, stop = 0 (user provideded) + # ... + # batch 99: images, labels, stop = 0 (user provideded) + # batch 100: images, labels, stop = 1 (TPUEstimator appended) + # + # where the final batch (id = 100) is appended by TPUEstimator, so we + # should drop it before returning the predictions to user. + # To achieve that, we throw the OutOfRangeError in after_run. Once + # Monitored Session sees this error in SessionRunHook.after_run, the + # "current" prediciton, i.e., batch with id=100, will be discarded + # immediately + raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.') + + def generate_per_core_enqueue_ops_fn_for_host(ctx, input_fn, inputs_structure_recorder): """Generates infeed enqueue ops for per-core input_fn on a single host.""" @@ -815,6 +917,14 @@ def generate_per_host_enqueue_ops_fn_for_host( inputs = _Inputs.from_input_fn(input_fn()) is_dataset = inputs.is_dataset + if ctx.mode == model_fn_lib.ModeKeys.PREDICT: + if not is_dataset: + raise TypeError( + 'For mode PREDICT, `input_fn` must return `Dataset` instead of ' + '`features` and `labels`.') + inputs = _InputsWithStoppingSignals( + dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn) + if is_dataset: hooks.append(inputs.dataset_initializer_hook()) @@ -825,11 +935,13 @@ def generate_per_host_enqueue_ops_fn_for_host( # dataset, it is initialized and the features and labels extracted via # `dataset.iterator.get_next()` features, labels = inputs.features_and_labels() + signals = inputs.signals() - inputs_structure_recorder.validate_and_record_structure(features, labels) + inputs_structure_recorder.validate_and_record_structure( + features, labels, signals) unsharded_tensor_list = ( inputs_structure_recorder.flatten_features_and_labels( - features, labels)) + features, labels, signals)) infeed_queue = tpu_feed.InfeedQueue( tuple_types=[t.dtype for t in unsharded_tensor_list], @@ -841,7 +953,13 @@ def generate_per_host_enqueue_ops_fn_for_host( per_host_enqueue_ops = ( infeed_queue.split_inputs_and_generate_enqueue_ops( unsharded_tensor_list, placement_function=lambda x: device)) - return per_host_enqueue_ops + if signals is None: + return per_host_enqueue_ops + else: + return { + 'ops': per_host_enqueue_ops, + 'signals': signals, + } return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset @@ -883,6 +1001,7 @@ class _InputPipeline(object): self._feature_names = [] self._label_names = [] self._has_labels = False + self._signals_helper = None # Internal state. self._initialized = False @@ -890,7 +1009,7 @@ class _InputPipeline(object): def has_labels(self): return self._has_labels - def validate_and_record_structure(self, features, labels): + def validate_and_record_structure(self, features, labels, signals=None): """Validates and records the structure of features` and `labels`.""" def _extract_key_names(tensor_or_dict): @@ -903,6 +1022,10 @@ class _InputPipeline(object): feature_names = _extract_key_names(features) label_names = _extract_key_names(labels) + if signals is not None and self._signals_helper is None: + # Record signals helper. + self._signals_helper = _SignalsHelper(signals) + if self._initialized: # Verify the structure is same. The following should never happen. assert feature_names == self._feature_names, 'feature keys mismatched' @@ -915,7 +1038,7 @@ class _InputPipeline(object): self._label_names = label_names self._has_labels = has_labels - def flatten_features_and_labels(self, features, labels): + def flatten_features_and_labels(self, features, labels, signals=None): """Flattens the `features` and `labels` to a single tensor list.""" flattened_inputs = [] if self._feature_names: @@ -931,6 +1054,9 @@ class _InputPipeline(object): flattened_inputs.extend([labels[name] for name in self._label_names]) else: flattened_inputs.append(labels) + + if signals is not None: + flattened_inputs.extend(_SignalsHelper.as_tensor_list(signals)) return flattened_inputs def unflatten_features_and_labels(self, flattened_inputs): @@ -956,7 +1082,11 @@ class _InputPipeline(object): else: expected_num_labels = 0 - expected_num_tensors = expected_num_features + expected_num_labels + expected_num_signals = ( + self._signals_helper.num_signals if self._signals_helper else 0) + + expected_num_tensors = ( + expected_num_features + expected_num_labels + expected_num_signals) if expected_num_tensors != len(flattened_inputs): raise ValueError( @@ -973,13 +1103,20 @@ class _InputPipeline(object): if expected_num_labels == 0: unflattened_label = None elif self._label_names: - unflattened_label = dict( - zip(self._label_names, flattened_inputs[expected_num_features:])) + label_list = flattened_inputs[ + expected_num_features:expected_num_features + expected_num_labels] + unflattened_label = dict(zip(self._label_names, label_list)) else: # Single tensor case. unflattened_label = flattened_inputs[expected_num_features] - return _Inputs(unflattened_features, unflattened_label) + signals = None + if expected_num_signals != 0: + tensor_list_for_signals = flattened_inputs[ + expected_num_features + expected_num_labels:] + signals = self._signals_helper.unflatten(tensor_list_for_signals) + + return _Inputs(unflattened_features, unflattened_label, signals=signals) def __init__(self, input_fn, batch_axis, ctx): """Constructor. @@ -1078,9 +1215,12 @@ class _InputPipeline(object): # slow compared to the previous case. if is_dataset: run_infeed_loop_on_coordinator = False + wrap_fn = ( + _wrap_computation_in_while_loop + if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else + _wrap_computation_in_while_loop_with_stopping_signals) enqueue_ops.append( - _wrap_computation_in_while_loop( - device=host_device, op_fn=enqueue_ops_fn)) + wrap_fn(device=host_device, op_fn=enqueue_ops_fn)) else: enqueue_ops.append(enqueue_ops_fn()) infeed_queues.append(captured_infeed_queue.get()) @@ -1145,7 +1285,8 @@ class _ModelFnWrapper(object): infeed dequeue channel. Returns: - A Fn representing the train step for TPU. + A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn + representing the train step for TPU. """ host_call = _OutfeedHostCall(self._ctx) @@ -1198,8 +1339,8 @@ class _ModelFnWrapper(object): infeed dequeue channel. Returns: - A tuple of eval_fn and eval_metrics. The eval_fn representing the eval - step for TPU. and eval_metrics is an `_OutfeedHostCall` instance. + A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn + representing the eval step for TPU. """ host_calls = _OutfeedHostCall(self._ctx) captured_scaffold_fn = _CapturedObject() @@ -1228,7 +1369,55 @@ class _ModelFnWrapper(object): return eval_step, host_calls, captured_scaffold_fn - def _call_model_fn(self, features, labels): + def convert_to_single_tpu_predict_step(self, dequeue_fn): + """Converts user provided model_fn` as a single predict step on TPU. + + Args: + dequeue_fn: The function to retrieve inputs, features and labels, from TPU + infeed dequeue channel. + + Returns: + A tuple of predict_fn, host_calls, and captured scaffold_fn. The + predict_fn representing the predict step for TPU. + """ + host_calls = _OutfeedHostCall(self._ctx) + captured_scaffold_fn = _CapturedObject() + + def predict_step(unused_scalar_stopping_signal): + """Evaluation step function for use inside a while loop.""" + inputs = dequeue_fn() + features, labels = inputs.features_and_labels() + stopping_signals = inputs.signals() + + assert stopping_signals is not None, ( + 'Internal Error: `signals` is missing.') + + tpu_estimator_spec = self._call_model_fn( + features, labels, is_export_mode=False) + if not isinstance(tpu_estimator_spec, TPUEstimatorSpec): + raise RuntimeError( + 'estimator_spec used by TPU prediction must have type' + '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec))) + + captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn) + to_record = {} + identity_fn = lambda **kwargs: kwargs + # TODO(xiejw): Adds validation for prediction dictionrary. + # TODO(xiejw): Adds support for single tensor as predictions. + if not isinstance(tpu_estimator_spec.predictions, dict): + raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.') + to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions] + to_record['signals'] = [identity_fn, stopping_signals] + if tpu_estimator_spec.host_call is not None: + to_record['host_call'] = tpu_estimator_spec.host_call + host_calls.record(to_record) + + with ops.control_dependencies(host_calls.create_enqueue_op()): + return _StopSignals.as_scalar_stopping_signal(stopping_signals) + + return predict_step, host_calls, captured_scaffold_fn + + def _call_model_fn(self, features, labels, is_export_mode=True): """Calls the model_fn with required parameters.""" model_fn_args = util.fn_args(self._model_fn) kwargs = {} @@ -1259,7 +1448,7 @@ class _ModelFnWrapper(object): params[_BATCH_SIZE_KEY] = batch_size_for_model_fn estimator_spec = self._model_fn(features=features, **kwargs) - if (self._ctx.is_running_on_cpu() and + if (self._ctx.is_running_on_cpu(is_export_mode) and isinstance(estimator_spec, TPUEstimatorSpec)): # The estimator_spec will be passed to `Estimator` directly, which expects # type `EstimatorSpec`. @@ -1614,6 +1803,7 @@ class TPUEstimator(estimator_lib.Estimator): use_tpu=True, train_batch_size=None, eval_batch_size=None, + predict_batch_size=None, batch_axis=None): """Constructs an `TPUEstimator` instance. @@ -1639,7 +1829,9 @@ class TPUEstimator(estimator_lib.Estimator): size, as params['batch_size'], when calling `input_fn` and `model_fn`. Cannot be `None` if `use_tpu` is `True`. Must be divisible by `config.tpu_config.num_shards`. - eval_batch_size: An int representing the global training batch size. + eval_batch_size: An int representing evaluation batch size. + Must be divisible by `config.tpu_config.num_shards`. + predict_batch_size: An int representing the prediction batch size. Must be divisible by `config.tpu_config.num_shards`. batch_axis: A python tuple of int values describing how each tensor produced by the Estimator `input_fn` should be split across the TPU @@ -1689,6 +1881,16 @@ class TPUEstimator(estimator_lib.Estimator): 'eval batch size {} must be divisible by number of shards {}' .format(eval_batch_size, config.tpu_config.num_shards)) + if predict_batch_size is not None: + if not isinstance(predict_batch_size, int): + raise ValueError('`predict_batch_size` must be an int') + if predict_batch_size < 1: + raise ValueError('`predict_batch_size` must be positive') + if predict_batch_size % config.tpu_config.num_shards != 0: + raise ValueError( + 'predict batch size {} must be divisible by number of shards {}' + .format(predict_batch_size, config.tpu_config.num_shards)) + # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent @@ -1708,7 +1910,7 @@ class TPUEstimator(estimator_lib.Estimator): # All properties passed to _TPUContext are immutable. self._ctx = _TPUContext(self._config, train_batch_size, eval_batch_size, - use_tpu) + predict_batch_size, use_tpu) def _create_global_step(self, graph): """Creates a global step suitable for TPUs. @@ -1804,7 +2006,9 @@ class TPUEstimator(estimator_lib.Estimator): if batch_size_for_input_fn is not None: kwargs['params'][_BATCH_SIZE_KEY] = batch_size_for_input_fn - if ctx.is_running_on_cpu(): + # For export_savedmodel, input_fn is never passed to Estimator. So, + # `is_export_mode` must be False. + if ctx.is_running_on_cpu(is_export_mode=False): with ops.device('/device:CPU:0'): return input_fn(**kwargs) @@ -1831,8 +2035,13 @@ class TPUEstimator(estimator_lib.Estimator): with self._ctx.with_mode(mode) as ctx: model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx) - # TODO(jhseu): Move to PREDICT to TPU. - if ctx.is_running_on_cpu(): + # For export_savedmodel, input_fn is never passed to Estimator. So, + # if features is callable, it means it is the input_fn passed by + # TPUEstimator._call_input_fn. Then we can know if the mode == PREDICT, + # it implies, it is the .predict API, not export_savedmodel API. + is_export_mode = not callable(features) + + if ctx.is_running_on_cpu(is_export_mode=is_export_mode): logging.info('Running %s on CPU', mode) return model_fn_wrapper.call_without_tpu(features, labels) @@ -1881,53 +2090,114 @@ class TPUEstimator(estimator_lib.Estimator): train_op=control_flow_ops.group(*update_ops), scaffold=scaffold) - # Now eval. - total_loss, host_calls, scaffold = _eval_on_tpu_system( + if mode == model_fn_lib.ModeKeys.EVAL: + total_loss, host_calls, scaffold = _eval_on_tpu_system( + ctx, model_fn_wrapper, dequeue_fn) + iterations_per_loop_var = _create_or_get_iterations_per_loop() + mean_loss = math_ops.div(total_loss, + math_ops.cast( + iterations_per_loop_var, + dtype=total_loss.dtype)) + + # Creates a dummy metric update_op for all metrics. Estimator expects + # all metrics in eval_metric_ops have update_op and calls them one by + # one. The real metric update_ops are invoked in a separated thread. + # So, here give Estimator the dummy op for all metrics. + with ops.control_dependencies([mean_loss]): + # After TPU evaluation computation is done (the mean_loss tensor), + # reads all variables back from TPU and updates the eval step + # counter properly + internal_ops_to_run = _sync_variables_ops() + internal_ops_to_run.append( + _increase_eval_step_op(iterations_per_loop_var)) + with ops.control_dependencies(internal_ops_to_run): + dummy_update_op = control_flow_ops.no_op() + + host_call_ret = host_calls.create_tpu_hostcall() + eval_metric_ops = {} + eval_update_ops = [] + for k, v in host_call_ret['eval_metrics'].items(): + eval_metric_ops[k] = (v[0], dummy_update_op) + eval_update_ops.append(v[1]) + + if 'host_call' not in host_call_ret: + host_ops = [] + else: + host_ops = host_call_ret['host_call'] + hooks = [ + TPUInfeedOutfeedSessionHook( + ctx, + enqueue_ops, + eval_update_ops + host_ops, + run_infeed_loop_on_coordinator=( + run_infeed_loop_on_coordinator)), + ] + input_hooks + + return model_fn_lib.EstimatorSpec( + mode, + loss=mean_loss, + evaluation_hooks=hooks, + eval_metric_ops=eval_metric_ops, + scaffold=scaffold) + + # Predict + assert mode == model_fn_lib.ModeKeys.PREDICT + + dummy_predict_op, host_calls, scaffold = _predict_on_tpu_system( ctx, model_fn_wrapper, dequeue_fn) - iterations_per_loop_var = _create_or_get_iterations_per_loop() - mean_loss = math_ops.div(total_loss, - math_ops.cast( - iterations_per_loop_var, - dtype=total_loss.dtype)) - - # Creates a dummy metric update_op for all metrics. Estimator expects - # all metrics in eval_metric_ops have update_op and calls them one by - # one. The real metric update_ops are invoked in a separated thread. So, - # here give Estimator the dummy op for all metrics. - with ops.control_dependencies([mean_loss]): - # After TPU evaluation computation is done (the mean_loss tensor), - # reads all variables back from TPU and updates the eval step counter - # properly + with ops.control_dependencies([dummy_predict_op]): internal_ops_to_run = _sync_variables_ops() - internal_ops_to_run.append( - _increase_eval_step_op(iterations_per_loop_var)) with ops.control_dependencies(internal_ops_to_run): - dummy_update_op = control_flow_ops.no_op() + dummy_predict_op = control_flow_ops.no_op() + + # In train and evaluation, the main TPU program is passed to monitored + # training session to run. Infeed enqueue and outfeed dequeue are + # executed in side threads. This is not the configuration for + # prediction mode. + # + # For prediction, the Estimator executes the EstimatorSpec.predictions + # directly and yield the element (via generator) to call site. So, the + # outfeed based prediction must be passed to MonitoredSession directly. + # Other parts of the TPU execution are organized as follows. + # + # 1. All outfeed based Tensors must be grouped with predictions Tensors + # to form a single invocation. This avoid the issue we might trigger + # multiple outfeeds incorrectly. To achieve this, `host_call` is + # placed in control_dependencies of `stopping_signals`, and + # `stopping_signals` is passed into _StoppingPredictHook, which sets + # the `stopping_signals` as SessionRunArgs. MonitoredSession merges + # all SessionRunArgs with the fetch in session.run together. + # + # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue) + # are grouped together. They will be launched once and only once in + # side threads and they quit naturally according to the SAME stopping + # condition. + enqueue_ops.append(dummy_predict_op) host_call_ret = host_calls.create_tpu_hostcall() - eval_metric_ops = {} - eval_update_ops = [] - for k, v in host_call_ret['eval_metrics'].items(): - eval_metric_ops[k] = (v[0], dummy_update_op) - eval_update_ops.append(v[1]) - if 'host_call' not in host_call_ret: host_ops = [] else: host_ops = host_call_ret['host_call'] + + predictions = host_call_ret['predictions'] + stopping_signals = host_call_ret['signals'] + + with ops.control_dependencies(host_ops): + host_ops = [] # Empty, we do do not need it anymore. + scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal( + stopping_signals) + hooks = [ - TPUInfeedOutfeedSessionHook( - ctx, - enqueue_ops, - eval_update_ops + host_ops, - run_infeed_loop_on_coordinator=run_infeed_loop_on_coordinator), + _StoppingPredictHook(scalar_stopping_signal), + TPUInfeedOutfeedSessionHookForPrediction(ctx, enqueue_ops, + host_ops), ] + input_hooks return model_fn_lib.EstimatorSpec( mode, - loss=mean_loss, - evaluation_hooks=hooks, - eval_metric_ops=eval_metric_ops, + prediction_hooks=hooks, + predictions=predictions, scaffold=scaffold) return _model_fn @@ -1981,6 +2251,34 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): return loss, host_call, scaffold +def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): + """Executes `model_fn_wrapper` multiple times on all TPU shards.""" + num_cores = ctx.num_cores + + single_tpu_predict_step, host_calls, captured_scaffold_fn = ( + model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn)) + + def multi_tpu_predict_steps_on_single_shard(): + + def cond(scalar_stopping_signal): + return math_ops.logical_not( + _StopSignals.should_stop(scalar_stopping_signal)) + + inputs = [_StopSignals.NON_STOPPING_SIGNAL] + outputs = training_loop.while_loop( + cond, single_tpu_predict_step, inputs=inputs, name=b'loop') + return outputs + + (dummy_predict_op,) = tpu.shard( + multi_tpu_predict_steps_on_single_shard, + inputs=[], + num_shards=num_cores, + outputs_from_all_shards=False) + + scaffold = _get_scaffold(captured_scaffold_fn) + return dummy_predict_op, host_calls, scaffold + + def _wrap_computation_in_while_loop(device, op_fn): """Wraps the ops generated by `op_fn` in tf.while_loop.""" @@ -1999,6 +2297,29 @@ def _wrap_computation_in_while_loop(device, op_fn): parallel_iterations=1) +def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn): + """Wraps the ops generated by `op_fn` in tf.while_loop.""" + + def cond(scalar_stopping_signal): + return math_ops.logical_not( + _StopSignals.should_stop(scalar_stopping_signal)) + + def computation(unused_scalar_stopping_signal): + return_value = op_fn() + execute_ops = return_value['ops'] + signals = return_value['signals'] + with ops.control_dependencies(execute_ops): + return _StopSignals.as_scalar_stopping_signal(signals) + + # By setting parallel_iterations=1, the parallel execution in while_loop is + # basically turned off. + with ops.device(device): + return control_flow_ops.while_loop( + cond, + computation, [_StopSignals.NON_STOPPING_SIGNAL], + parallel_iterations=1) + + def _validate_tpu_training_graph(): """Validate graph before running distributed training. @@ -2091,21 +2412,22 @@ class _CapturingContext(control_flow_ops.ControlFlowContext): self._g._set_control_flow_context(self._old) # pylint: disable=protected-access -# TODO(xiejw): Extend this to support internal signal. class _Inputs(object): """A data structure representing the input_fn returned values. This also supports the returned value from input_fn as `Dataset`. """ - def __init__(self, features=None, labels=None, dataset=None): - if dataset is not None and (features is not None or labels is not None): + def __init__(self, features=None, labels=None, dataset=None, signals=None): + if dataset is not None and (features is not None or labels is not None or + signals is not None): raise RuntimeError('Internal Error: Either (features and labels) or ' 'dataset should be provided, not both. Please file ' 'bug') self._features = features self._labels = labels + self._signals = signals self._dataset = dataset self._iterator = None @@ -2117,11 +2439,16 @@ class _Inputs(object): dataset = return_values return _Inputs(dataset=dataset) + features, labels = _Inputs._parse_inputs(return_values) + return _Inputs(features, labels) + + @staticmethod + def _parse_inputs(return_values): if isinstance(return_values, tuple): features, labels = return_values else: features, labels = return_values, None - return _Inputs(features, labels) + return features, labels @property def is_dataset(self): @@ -2142,7 +2469,135 @@ class _Inputs(object): def features_and_labels(self): """Gets `features` and `labels`.""" if self.is_dataset: - return (_Inputs.from_input_fn( - self._iterator.get_next()).features_and_labels()) + return _Inputs._parse_inputs(self._iterator.get_next()) return (self._features, self._labels) + + def signals(self): + return self._signals + + @property + def dataset(self): + return self._dataset + + +# TODO(xiejw): Extend this to support final partial batch. +class _InputsWithStoppingSignals(_Inputs): + """Inputs with `_StopSignals` inserted into the dataset.""" + + def __init__(self, dataset, batch_size): + + assert dataset is not None + + user_provided_dataset = dataset.map( + _InputsWithStoppingSignals.insert_stopping_signal( + stop=False, batch_size=batch_size)) + final_batch_dataset = dataset.take(1).map( + _InputsWithStoppingSignals.insert_stopping_signal( + stop=True, batch_size=batch_size)) + dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2) + + super(_InputsWithStoppingSignals, self).__init__(dataset=dataset) + self._current_inputs = None + + def features_and_labels(self): + if self._current_inputs is not None: + raise RuntimeError( + 'Internal Error: The previous inputs have not been properly ' + 'consumed. First call features_and_labels, then call signals.') + + inputs_with_signals = self._iterator.get_next() + features = inputs_with_signals['features'] + labels = inputs_with_signals.get('labels') + + self._current_inputs = inputs_with_signals + return features, labels + + def signals(self): + """Returns the `Signals` from `_Inputs`.""" + if self._current_inputs is None: + raise RuntimeError( + 'Internal Error: The current inputs have not been properly ' + 'generated. First call features_and_labels, then call signals.') + signals = self._current_inputs['signals'] + self._current_inputs = None + return signals + + @staticmethod + def insert_stopping_signal(stop, batch_size): + """Inserts stopping_signal into dataset via _map_fn. + + Here we change the data structure in the dataset, such that the return value + is a dictionary now and `features`, `labels`, and `signals` are three + distinguished keys in that dict. This provides a better structure, which + eases the process to decompose the inputs (see `features_and_labels`). + + Args: + stop: bool, state of current stopping signals. + batch_size: int, batch size. + + Returns: + A map_fn passed to dataset.map API. + """ + + def _map_fn(*args): + features, labels = _Inputs._parse_inputs(args) + new_input_dict = {} + new_input_dict['features'] = features + if labels is not None: + new_input_dict['labels'] = labels + new_input_dict['signals'] = _StopSignals( + stop=stop, batch_size=batch_size).as_dict() + return new_input_dict + + return _map_fn + + +class _StopSignals(object): + """Signals class holding all logic to handle TPU stopping condition.""" + + NON_STOPPING_SIGNAL = 0.0 + STOPPING_SIGNAL = 1.0 + + def __init__(self, stop, batch_size): + self._stop = stop + self._batch_size = batch_size + + def as_dict(self): + shape = [self._batch_size, 1] + dtype = dtypes.float32 + + if self._stop: + stopping = array_ops.ones(shape=shape, dtype=dtype) + else: + stopping = array_ops.zeros(shape=shape, dtype=dtype) + + return {'stopping': stopping} + + @staticmethod + def as_scalar_stopping_signal(signals): + return array_ops.identity(signals['stopping'][0][0]) + + @staticmethod + def should_stop(scalar_stopping_signal): + return scalar_stopping_signal >= _StopSignals.STOPPING_SIGNAL + + +class _SignalsHelper(object): + """A general helper class to handle common signals manipulation.""" + + def __init__(self, signals): + self._signal_keys = [] + for key in sorted(signals.iterkeys()): + self._signal_keys.append(key) + + @property + def num_signals(self): + return len(self._signal_keys) + + def unflatten(self, tensor_list): + return dict(zip(self._signal_keys, tensor_list)) + + @staticmethod + def as_tensor_list(signals): + return [signals[key] for key in sorted(signals.iterkeys())] -- GitLab From 7785a8ebf417d0e867a08cabf2d42bb9b29dcb98 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 2 Feb 2018 16:35:10 -0800 Subject: [PATCH 043/629] Update global_step by default if the user specifies a host_call. PiperOrigin-RevId: 184352399 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index b5082fc823..56793f11d9 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -63,6 +63,7 @@ from tensorflow.python.training import evaluation from tensorflow.python.training import session_run_hook from tensorflow.python.training import training from tensorflow.python.training import training_util +from tensorflow.python.util import tf_inspect _INITIAL_LOSS = 1e7 _ZERO_LOSS = 0. @@ -484,7 +485,7 @@ class TPUEstimatorSpec( if self.eval_metrics is not None: host_calls['eval_metrics'] = self.eval_metrics if self.host_call is not None: - host_calls['host_call'] = self.host_call + host_calls['host_call'] = wrap_hostcall_with_global_step(self.host_call) host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls) eval_metric_ops = None if self.eval_metrics is not None: @@ -1306,7 +1307,9 @@ class _ModelFnWrapper(object): if isinstance(estimator_spec, TPUEstimatorSpec): captured_scaffold_fn.capture(estimator_spec.scaffold_fn) if estimator_spec.host_call is not None: - host_call.record({'host_call': estimator_spec.host_call}) + host_call.record({ + 'host_call': wrap_hostcall_with_global_step( + estimator_spec.host_call)}) host_call_outfeed_ops = host_call.create_enqueue_op() else: captured_scaffold_fn.capture(None) @@ -1361,6 +1364,8 @@ class _ModelFnWrapper(object): to_record = {} to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics if tpu_estimator_spec.host_call is not None: + # We assume that evaluate won't update global step, so we don't wrap + # this host_call. to_record['host_call'] = tpu_estimator_spec.host_call host_calls.record(to_record) @@ -1503,11 +1508,14 @@ class _OutfeedHostCall(object): raise ValueError('{}[1] should be tuple or list, or dict.'.format(name)) if isinstance(host_call[1], (tuple, list)): + fullargspec = tf_inspect.getfullargspec(host_call[0]) fn_args = util.fn_args(host_call[0]) - if len(host_call[1]) != len(fn_args): + # wrapped_hostcall_with_global_step uses varargs, so we allow that. + if fullargspec.varargs is None and len(host_call[1]) != len(fn_args): raise RuntimeError( - 'In TPUEstimatorSpec.{}, length of tensors does not ' - 'match method args of metric_fn.'.format(name)) + 'In TPUEstimatorSpec.{}, length of tensors {} does not match ' + 'method args of the function, which takes {}.'.format( + name, len(host_call[1]), len(fn_args))) @staticmethod def create_cpu_hostcall(host_calls): @@ -1649,6 +1657,38 @@ class _OutfeedHostCall(object): return ret +def wrap_hostcall_with_global_step(hostcall): + """Wrap the hostcall so that we update the global step upon every call.""" + if hostcall is None: + return None + host_fn, tensors = hostcall + + def global_step_host_fn(_global_step, *args, **kwargs): # pylint: disable=invalid-name + # Note that we don't have any ordering here, so the graph may see a + # global_step that's off by 1. + state_ops.assign( + training.get_global_step(), + math_ops.cast(_global_step[0], dtypes.int64)) + return host_fn(*args, **kwargs) + # Give the global step tensor a batch dimension. Reshape is not supported for + # int64, so we cast it to int32. + # TODO(jhseu): Remove the cast once int64 is supported. + global_step_tensor = array_ops.reshape( + math_ops.cast(training.get_global_step(), dtypes.int32), [1]) + if isinstance(tensors, dict): + outfeed_tensors = {'_global_step': global_step_tensor} + outfeed_tensors.update(tensors) + return global_step_host_fn, outfeed_tensors + else: + fn_args = util.fn_args(host_fn) + if len(tensors) != len(fn_args): + raise RuntimeError( + 'In TPUEstimatorSpec.host_call, length of tensors {} does not match ' + 'method args of the function, which takes {}.'.format( + len(tensors), len(fn_args))) + return global_step_host_fn, [global_step_tensor] + list(tensors) + + class _OutfeedHostCallHook(session_run_hook.SessionRunHook): """Hook to run host calls when use_tpu=False.""" -- GitLab From 359ab22b7972802e19fe7949ebd945a59998f549 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Sat, 3 Feb 2018 09:05:25 -0800 Subject: [PATCH 044/629] skyewm: Fix Python3 crazy SessionTest.testReentryWithCApi failure. --- tensorflow/python/client/session.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 6befeb846d..f3c4fecdc0 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1539,8 +1539,22 @@ class Session(BaseSession): def __exit__(self, exec_type, exec_value, exec_tb): if exec_type is errors.OpError: logging.error('Session closing due to OpError: %s', (exec_value,)) - self._default_session_context_manager.__exit__(exec_type, exec_value, - exec_tb) + try: + self._default_session_context_manager.__exit__(exec_type, exec_value, + exec_tb) + except RuntimeError as error: + if error == exec_value: + # NOTE(skyewm): for some reason, in Python3, + # _default_session_context_manager.__exit__ will re-raise the "not + # re-entrant" exception raised in __enter__ above (note that if we're + # here, we're in the outer session context manager, since __exit__ is + # not called when __enter__ raises an exception). We still want to + # continue cleaning up this context manager before the exception is + # further propagated, so we ignore it here (note that it'll continue + # being propagated after this method completes). + pass + else: + raise self._default_graph_context_manager.__exit__(exec_type, exec_value, exec_tb) self._default_session_context_manager = None -- GitLab From b23908b41ebad5dbe86255c2f196641e42490b2f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Sat, 3 Feb 2018 09:17:55 -0800 Subject: [PATCH 045/629] Disabling keras io_utils_test on Windows. --- tensorflow/contrib/cmake/tf_tests.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 2e79eadf7f..73edd616ea 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -310,6 +310,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/control_flow_util_test.py" # Flaky replicate_model_fn_test "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py" # b/71901810 + # Broken io_utils_test + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/io_utils_test.py" # b/72894325 ) endif() list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) -- GitLab From 6842913e9b56baa88c11f188e29e13466be9ed86 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sat, 3 Feb 2018 09:46:13 -0800 Subject: [PATCH 046/629] Delete device_functions.h include. --- tensorflow/core/util/cuda_device_functions.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index f787687f66..d67663c669 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -29,7 +29,6 @@ limitations under the License. #include #include #include "cuda/include/cuda.h" -#include "cuda/include/device_functions.h" #include "tensorflow/core/platform/types.h" #if CUDA_VERSION >= 7050 -- GitLab From 7946a0b1d9998cc54cb952d538668883d3fd8181 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sat, 3 Feb 2018 22:02:53 -0800 Subject: [PATCH 047/629] Fix the Windows GPU build #2 --- tensorflow/core/util/cuda_device_functions.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index d67663c669..525bef16a0 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -31,10 +31,6 @@ limitations under the License. #include "cuda/include/cuda.h" #include "tensorflow/core/platform/types.h" -#if CUDA_VERSION >= 7050 -#include "cuda/include/cuda_fp16.h" -#endif // CUDA_VERSION >= 7050 - namespace tensorflow { namespace detail { -- GitLab From 232fec7e52e52053a0e04c8919e7ece5c654d2de Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sun, 4 Feb 2018 09:28:59 -0800 Subject: [PATCH 048/629] Fix for_canonicalization_test --- tensorflow/contrib/py2tf/converters/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 3853c60f99..fc6781d50e 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -131,6 +131,7 @@ py_test( py_test( name = "for_canonicalization_test", srcs = ["for_canonicalization_test.py"], + srcs_version = "PY2AND3", deps = [ ":test_lib", "//tensorflow/contrib/py2tf/pyct", -- GitLab From 573c6f40a90ace2bc921738937fea32fdf724f7b Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Mon, 5 Feb 2018 13:36:22 -0800 Subject: [PATCH 049/629] Bump the required numpy version in r1.6 --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 2002786999..fe2c22f2f5 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [ 'astor >= 0.6.0', 'gast >= 0.2.0', 'grpcio >= 1.8.6', - 'numpy >= 1.12.1', + 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', 'tensorflow-tensorboard >= 1.5.0, < 1.6.0', -- GitLab From cf03e3dce149395f24176f0898bd6f0a5b4e5916 Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 13:22:05 +0100 Subject: [PATCH 050/629] make ImageClassifier abstract and introduce new concrete subclasses --- .../src/main/assets/labels_imagenet_slim.txt | 1001 +++++++++++++++++ ....txt => labels_mobilenet_quant_v1_224.txt} | 0 .../Camera2BasicFragment.java | 5 +- .../tflitecamerademo/ImageClassifier.java | 104 +- .../ImageClassifierFloatInception.java | 85 ++ .../ImageClassifierQuantizedMobileNet.java | 79 ++ 6 files changed, 1245 insertions(+), 29 deletions(-) create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_imagenet_slim.txt rename tensorflow/contrib/lite/java/demo/app/src/main/assets/{labels.txt => labels_mobilenet_quant_v1_224.txt} (100%) create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java create mode 100644 tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_imagenet_slim.txt b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_imagenet_slim.txt new file mode 100644 index 0000000000..572eccf900 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_imagenet_slim.txt @@ -0,0 +1,1001 @@ +dummy +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt b/tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_mobilenet_quant_v1_224.txt similarity index 100% rename from tensorflow/contrib/lite/java/demo/app/src/main/assets/labels.txt rename to tensorflow/contrib/lite/java/demo/app/src/main/assets/labels_mobilenet_quant_v1_224.txt diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java index 74737a8b88..d2048b41b1 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -296,7 +296,8 @@ public class Camera2BasicFragment extends Fragment public void onActivityCreated(Bundle savedInstanceState) { super.onActivityCreated(savedInstanceState); try { - classifier = new ImageClassifier(getActivity()); + // create either a new ImageClassifierQuantizedMobileNet or an ImageClassifierFloatInception + classifier = new ImageClassifierQuantizedMobileNet(getActivity()); } catch (IOException e) { Log.e(TAG, "Failed to initialize an image classifier."); } @@ -659,7 +660,7 @@ public class Camera2BasicFragment extends Fragment return; } Bitmap bitmap = - textureView.getBitmap(ImageClassifier.DIM_IMG_SIZE_X, ImageClassifier.DIM_IMG_SIZE_Y); + textureView.getBitmap(classifier.getImageSizeX(), classifier.getImageSizeY()); String textToShow = classifier.classifyFrame(bitmap); bitmap.recycle(); showToast(textToShow); diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index e44c5ae6b4..a0f1d04983 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -20,6 +20,9 @@ import android.content.res.AssetFileDescriptor; import android.graphics.Bitmap; import android.os.SystemClock; import android.util.Log; + +import org.tensorflow.lite.Interpreter; + import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; @@ -34,20 +37,15 @@ import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.PriorityQueue; -import org.tensorflow.lite.Interpreter; -/** Classifies images with Tensorflow Lite. */ -public class ImageClassifier { +/** + * Classifies images with Tensorflow Lite. + */ +public abstract class ImageClassifier { /** Tag for the {@link Log}. */ private static final String TAG = "TfLiteCameraDemo"; - /** Name of the model file stored in Assets. */ - private static final String MODEL_PATH = "mobilenet_quant_v1_224.tflite"; - - /** Name of the label file stored in Assets. */ - private static final String LABEL_PATH = "labels.txt"; - /** Number of results to show in the UI. */ private static final int RESULTS_TO_SHOW = 3; @@ -56,11 +54,8 @@ public class ImageClassifier { private static final int DIM_PIXEL_SIZE = 3; - static final int DIM_IMG_SIZE_X = 224; - static final int DIM_IMG_SIZE_Y = 224; - /* Preallocated buffers for storing image data in. */ - private int[] intValues = new int[DIM_IMG_SIZE_X * DIM_IMG_SIZE_Y]; + private int[] intValues = new int[getImageSizeX() * getImageSizeY()]; /** An instance of the driver class to run model inference with Tensorflow Lite. */ private Interpreter tflite; @@ -71,8 +66,12 @@ public class ImageClassifier { /** A ByteBuffer to hold image data, to be feed into Tensorflow Lite as inputs. */ private ByteBuffer imgData = null; - /** An array to hold inference results, to be feed into Tensorflow Lite as outputs. */ - private byte[][] labelProbArray = null; + /** + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * For production, you may want to replace the boxed datatype with a primitive one. + */ + protected Number[][] labelProbArray = null; + /** multi-stage low pass filter * */ private float[][] filterLabelProbArray = null; @@ -95,9 +94,10 @@ public class ImageClassifier { labelList = loadLabelList(activity); imgData = ByteBuffer.allocateDirect( - DIM_BATCH_SIZE * DIM_IMG_SIZE_X * DIM_IMG_SIZE_Y * DIM_PIXEL_SIZE); + DIM_BATCH_SIZE * getImageSizeX() * getImageSizeY() * DIM_PIXEL_SIZE * + getNumBytesPerChannel()); imgData.order(ByteOrder.nativeOrder()); - labelProbArray = new byte[1][labelList.size()]; + labelProbArray = createLabelProbArray(labelList.size()); filterLabelProbArray = new float[FILTER_STAGES][labelList.size()]; Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); } @@ -128,9 +128,10 @@ public class ImageClassifier { int numLabels = labelList.size(); // Low pass filter `labelProbArray` into the first stage of the filter. + // TODO labelProbArray[0][j].floatValue() for (int j = 0; j < numLabels; ++j) { filterLabelProbArray[0][j] += - FILTER_FACTOR * (labelProbArray[0][j] - filterLabelProbArray[0][j]); + FILTER_FACTOR * (labelProbArray[0][j].floatValue() - filterLabelProbArray[0][j]); } // Low pass filter each stage into the next. for (int i = 1; i < FILTER_STAGES; ++i) { @@ -142,7 +143,7 @@ public class ImageClassifier { // Copy the last stage filter output back to `labelProbArray`. for (int j = 0; j < numLabels; ++j) { - labelProbArray[0][j] = (byte)filterLabelProbArray[FILTER_STAGES - 1][j]; + labelProbArray[0][j] = filterLabelProbArray[FILTER_STAGES - 1][j]; } } @@ -156,7 +157,7 @@ public class ImageClassifier { private List loadLabelList(Activity activity) throws IOException { List labelList = new ArrayList(); BufferedReader reader = - new BufferedReader(new InputStreamReader(activity.getAssets().open(LABEL_PATH))); + new BufferedReader(new InputStreamReader(activity.getAssets().open(getLabelPath()))); String line; while ((line = reader.readLine()) != null) { labelList.add(line); @@ -167,7 +168,7 @@ public class ImageClassifier { /** Memory-map the model file in Assets. */ private MappedByteBuffer loadModelFile(Activity activity) throws IOException { - AssetFileDescriptor fileDescriptor = activity.getAssets().openFd(MODEL_PATH); + AssetFileDescriptor fileDescriptor = activity.getAssets().openFd(getModelPath()); FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor()); FileChannel fileChannel = inputStream.getChannel(); long startOffset = fileDescriptor.getStartOffset(); @@ -185,12 +186,10 @@ public class ImageClassifier { // Convert the image to floating point. int pixel = 0; long startTime = SystemClock.uptimeMillis(); - for (int i = 0; i < DIM_IMG_SIZE_X; ++i) { - for (int j = 0; j < DIM_IMG_SIZE_Y; ++j) { + for (int i = 0; i < getImageSizeX(); ++i) { + for (int j = 0; j < getImageSizeY(); ++j) { final int val = intValues[pixel++]; - imgData.put((byte) ((val >> 16) & 0xFF)); - imgData.put((byte) ((val >> 8) & 0xFF)); - imgData.put((byte) (val & 0xFF)); + addPixelValue(val, imgData); } } long endTime = SystemClock.uptimeMillis(); @@ -201,7 +200,7 @@ public class ImageClassifier { private String printTopKLabels() { for (int i = 0; i < labelList.size(); ++i) { sortedLabels.add( - new AbstractMap.SimpleEntry<>(labelList.get(i), (labelProbArray[0][i] & 0xff) / 255.0f)); + new AbstractMap.SimpleEntry<>(labelList.get(i), getProbability(i))); if (sortedLabels.size() > RESULTS_TO_SHOW) { sortedLabels.poll(); } @@ -214,4 +213,55 @@ public class ImageClassifier { } return textToShow; } + + /** + * Get the name of the model file stored in Assets. + * @return + */ + protected abstract String getModelPath(); + + /** + * Get the name of the label file stored in Assets. + * @return + */ + protected abstract String getLabelPath(); + + /** + * Get the image size along the x axis. + * @return + */ + protected abstract int getImageSizeX(); + + /** + * Get the image size along the y axis. + * @return + */ + protected abstract int getImageSizeY(); + + /** + * Initialize a new value for this.labelProbArray + * @param numLabels The total number of used labels. + * @return + */ + protected abstract Number[][] createLabelProbArray(int numLabels); + + /** + * Get the number of bytes that is used to store a single color channel value. + * @return + */ + protected abstract int getNumBytesPerChannel(); + + /** + * Add pixelValue to byteBuffer. + * @param pixelValue + * @param imgData + */ + protected abstract void addPixelValue(int pixelValue, ByteBuffer imgData); + + /** + * Read the probability value for the specified label from the saved inference result. + * @param labelIndex + * @return + */ + protected abstract float getProbability(int labelIndex); } diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java new file mode 100644 index 0000000000..aee5370fd5 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -0,0 +1,85 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.app.Activity; + +import java.io.IOException; +import java.nio.ByteBuffer; + + +public class ImageClassifierFloatInception extends ImageClassifier { + + /** + * The inception net requires additional normalization of the used input. + */ + private static final int IMAGE_MEAN = 128; + private static final float IMAGE_STD = 128.0f; + + /** + * Initializes an {@code ImageClassifier}. + * + * @param activity + */ + ImageClassifierFloatInception(Activity activity) throws IOException { + super(activity); + } + + @Override + protected String getModelPath() { + // you can download this file from + // https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip + return "inceptionv3_slim_2016.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_imagenet_slim.txt"; + } + + @Override + protected int getImageSizeX() { + return 299; + } + + @Override + protected int getImageSizeY() { + return 299; + } + + @Override + protected Float[][] createLabelProbArray(int numLabels) { + return new Float[1][numLabels]; + } + + @Override + protected int getNumBytesPerChannel() { + // a 32bit float value requires 4 bytes + return 4; + } + + @Override + protected void addPixelValue(int pixelValue, ByteBuffer imgData) { + imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + } + + @Override + protected float getProbability(int labelIndex) { + return (float) labelProbArray[0][labelIndex]; + } +} diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java new file mode 100644 index 0000000000..f841c13af1 --- /dev/null +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -0,0 +1,79 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package com.example.android.tflitecamerademo; + +import android.app.Activity; + +import java.io.IOException; +import java.nio.ByteBuffer; + + +public class ImageClassifierQuantizedMobileNet extends ImageClassifier { + + /** + * Initializes an {@code ImageClassifier}. + * + * @param activity + */ + ImageClassifierQuantizedMobileNet(Activity activity) throws IOException { + super(activity); + } + + @Override + protected String getModelPath() { + // you can download this file from + // https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip + return "mobilenet_quant_v1_224.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_mobilenet_quant_v1_224.txt"; + } + + @Override + protected int getImageSizeX() { + return 224; + } + + @Override + protected int getImageSizeY() { + return 224; + } + + @Override + protected Byte[][] createLabelProbArray(int numLabels) { + return new Byte[0][numLabels]; + } + + @Override + protected int getNumBytesPerChannel() { + // the quantized model uses a single byte only + return 1; + } + + @Override + protected void addPixelValue(int pixelValue, ByteBuffer imgData) { + imgData.put((byte) ((pixelValue >> 16) & 0xFF)); + imgData.put((byte) ((pixelValue >> 8) & 0xFF)); + imgData.put((byte) (pixelValue & 0xFF)); + } + + @Override + protected float getProbability(int labelIndex) { + return (((byte)labelProbArray[0][labelIndex] & 0xff) / 255.0f); + } +} -- GitLab From 93eb232010f8d88f3f38567799ce3a9bb10b5fa6 Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 14:02:36 +0100 Subject: [PATCH 051/629] refactor labelProbArray, because the TensorFlow interface doesn't support boxed data types --- .../tflitecamerademo/ImageClassifier.java | 56 +++++++++++-------- .../ImageClassifierFloatInception.java | 26 ++++++--- .../ImageClassifierQuantizedMobileNet.java | 26 ++++++--- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index a0f1d04983..806ec485ca 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -58,19 +58,13 @@ public abstract class ImageClassifier { private int[] intValues = new int[getImageSizeX() * getImageSizeY()]; /** An instance of the driver class to run model inference with Tensorflow Lite. */ - private Interpreter tflite; + protected Interpreter tflite; /** Labels corresponding to the output of the vision model. */ private List labelList; /** A ByteBuffer to hold image data, to be feed into Tensorflow Lite as inputs. */ - private ByteBuffer imgData = null; - - /** - * An array to hold inference results, to be feed into Tensorflow Lite as outputs. - * For production, you may want to replace the boxed datatype with a primitive one. - */ - protected Number[][] labelProbArray = null; + protected ByteBuffer imgData = null; /** multi-stage low pass filter * */ private float[][] filterLabelProbArray = null; @@ -97,7 +91,6 @@ public abstract class ImageClassifier { DIM_BATCH_SIZE * getImageSizeX() * getImageSizeY() * DIM_PIXEL_SIZE * getNumBytesPerChannel()); imgData.order(ByteOrder.nativeOrder()); - labelProbArray = createLabelProbArray(labelList.size()); filterLabelProbArray = new float[FILTER_STAGES][labelList.size()]; Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); } @@ -111,7 +104,7 @@ public abstract class ImageClassifier { convertBitmapToByteBuffer(bitmap); // Here's where the magic happens!!! long startTime = SystemClock.uptimeMillis(); - tflite.run(imgData, labelProbArray); + runInference(); long endTime = SystemClock.uptimeMillis(); Log.d(TAG, "Timecost to run model inference: " + Long.toString(endTime - startTime)); @@ -125,13 +118,12 @@ public abstract class ImageClassifier { } void applyFilter() { - int numLabels = labelList.size(); + int numLabels = getNumLabels(); // Low pass filter `labelProbArray` into the first stage of the filter. - // TODO labelProbArray[0][j].floatValue() for (int j = 0; j < numLabels; ++j) { filterLabelProbArray[0][j] += - FILTER_FACTOR * (labelProbArray[0][j].floatValue() - filterLabelProbArray[0][j]); + FILTER_FACTOR * (getProbability(j) - filterLabelProbArray[0][j]); } // Low pass filter each stage into the next. for (int i = 1; i < FILTER_STAGES; ++i) { @@ -143,7 +135,7 @@ public abstract class ImageClassifier { // Copy the last stage filter output back to `labelProbArray`. for (int j = 0; j < numLabels; ++j) { - labelProbArray[0][j] = filterLabelProbArray[FILTER_STAGES - 1][j]; + setProbability(j, filterLabelProbArray[FILTER_STAGES - 1][j]); } } @@ -189,7 +181,7 @@ public abstract class ImageClassifier { for (int i = 0; i < getImageSizeX(); ++i) { for (int j = 0; j < getImageSizeY(); ++j) { final int val = intValues[pixel++]; - addPixelValue(val, imgData); + addPixelValue(val); } } long endTime = SystemClock.uptimeMillis(); @@ -238,13 +230,6 @@ public abstract class ImageClassifier { */ protected abstract int getImageSizeY(); - /** - * Initialize a new value for this.labelProbArray - * @param numLabels The total number of used labels. - * @return - */ - protected abstract Number[][] createLabelProbArray(int numLabels); - /** * Get the number of bytes that is used to store a single color channel value. * @return @@ -254,9 +239,8 @@ public abstract class ImageClassifier { /** * Add pixelValue to byteBuffer. * @param pixelValue - * @param imgData */ - protected abstract void addPixelValue(int pixelValue, ByteBuffer imgData); + protected abstract void addPixelValue(int pixelValue); /** * Read the probability value for the specified label from the saved inference result. @@ -264,4 +248,28 @@ public abstract class ImageClassifier { * @return */ protected abstract float getProbability(int labelIndex); + + /** + * Set the probability value for the specified label. + * @param labelIndex + * @param value + */ + protected abstract void setProbability(int labelIndex, Number value); + + /** + * Run inference using the prepared input in this.imgData. + * The result will be saved in this.labelPropArray. + * + * This additional method is necessary, because we don't have a common base for different + * primitive data types. + */ + protected abstract void runInference(); + + /** + * Get the total number of labels. + * @return + */ + protected int getNumLabels() { + return labelList.size(); + } } diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java index aee5370fd5..9dac61d3c4 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -29,6 +29,12 @@ public class ImageClassifierFloatInception extends ImageClassifier { private static final int IMAGE_MEAN = 128; private static final float IMAGE_STD = 128.0f; + /** + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. + */ + protected float[][] labelProbArray = null; + /** * Initializes an {@code ImageClassifier}. * @@ -36,6 +42,7 @@ public class ImageClassifierFloatInception extends ImageClassifier { */ ImageClassifierFloatInception(Activity activity) throws IOException { super(activity); + labelProbArray = new float[1][getNumLabels()]; } @Override @@ -60,11 +67,6 @@ public class ImageClassifierFloatInception extends ImageClassifier { return 299; } - @Override - protected Float[][] createLabelProbArray(int numLabels) { - return new Float[1][numLabels]; - } - @Override protected int getNumBytesPerChannel() { // a 32bit float value requires 4 bytes @@ -72,7 +74,7 @@ public class ImageClassifierFloatInception extends ImageClassifier { } @Override - protected void addPixelValue(int pixelValue, ByteBuffer imgData) { + protected void addPixelValue(int pixelValue) { imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD); @@ -80,6 +82,16 @@ public class ImageClassifierFloatInception extends ImageClassifier { @Override protected float getProbability(int labelIndex) { - return (float) labelProbArray[0][labelIndex]; + return labelProbArray[0][labelIndex]; + } + + @Override + protected void setProbability(int labelIndex, Number value) { + labelProbArray[0][labelIndex] = value.floatValue(); + } + + @Override + protected void runInference() { + tflite.run(imgData, labelProbArray); } } diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index f841c13af1..3ca621f900 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -23,6 +23,12 @@ import java.nio.ByteBuffer; public class ImageClassifierQuantizedMobileNet extends ImageClassifier { + /** + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. + */ + protected byte[][] labelProbArray = null; + /** * Initializes an {@code ImageClassifier}. * @@ -30,6 +36,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { */ ImageClassifierQuantizedMobileNet(Activity activity) throws IOException { super(activity); + labelProbArray = new byte[1][getNumLabels()]; } @Override @@ -54,11 +61,6 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { return 224; } - @Override - protected Byte[][] createLabelProbArray(int numLabels) { - return new Byte[0][numLabels]; - } - @Override protected int getNumBytesPerChannel() { // the quantized model uses a single byte only @@ -66,7 +68,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { } @Override - protected void addPixelValue(int pixelValue, ByteBuffer imgData) { + protected void addPixelValue(int pixelValue) { imgData.put((byte) ((pixelValue >> 16) & 0xFF)); imgData.put((byte) ((pixelValue >> 8) & 0xFF)); imgData.put((byte) (pixelValue & 0xFF)); @@ -74,6 +76,16 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { @Override protected float getProbability(int labelIndex) { - return (((byte)labelProbArray[0][labelIndex] & 0xff) / 255.0f); + return ((labelProbArray[0][labelIndex] & 0xff) / 255.0f); + } + + @Override + protected void setProbability(int labelIndex, Number value) { + labelProbArray[0][labelIndex] = value.byteValue(); + } + + @Override + protected void runInference() { + tflite.run(imgData, labelProbArray); } } -- GitLab From b0f9f8acb31d121670bb836167217cbdf403dacb Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 14:33:19 +0100 Subject: [PATCH 052/629] bugfix --- .../tflitecamerademo/ImageClassifier.java | 17 +++++++++++++---- .../ImageClassifierFloatInception.java | 8 +++++++- .../ImageClassifierQuantizedMobileNet.java | 9 +++++++-- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index 806ec485ca..c3f0db1638 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -91,7 +91,7 @@ public abstract class ImageClassifier { DIM_BATCH_SIZE * getImageSizeX() * getImageSizeY() * DIM_PIXEL_SIZE * getNumBytesPerChannel()); imgData.order(ByteOrder.nativeOrder()); - filterLabelProbArray = new float[FILTER_STAGES][labelList.size()]; + filterLabelProbArray = new float[FILTER_STAGES][getNumLabels()]; Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); } @@ -190,9 +190,9 @@ public abstract class ImageClassifier { /** Prints top-K labels, to be shown in UI as the results. */ private String printTopKLabels() { - for (int i = 0; i < labelList.size(); ++i) { + for (int i = 0; i < getNumLabels(); ++i) { sortedLabels.add( - new AbstractMap.SimpleEntry<>(labelList.get(i), getProbability(i))); + new AbstractMap.SimpleEntry<>(labelList.get(i), getNormalizedProbability(i))); if (sortedLabels.size() > RESULTS_TO_SHOW) { sortedLabels.poll(); } @@ -243,7 +243,9 @@ public abstract class ImageClassifier { protected abstract void addPixelValue(int pixelValue); /** - * Read the probability value for the specified label from the saved inference result. + * Read the probability value for the specified label + * This is either the original value as it was read from the net's output or the updated value + * after the filter was applied. * @param labelIndex * @return */ @@ -256,6 +258,13 @@ public abstract class ImageClassifier { */ protected abstract void setProbability(int labelIndex, Number value); + /** + * Get the normalized probability value for the specified label. + * This is the final value as it will be shown to the user. + * @return + */ + protected abstract float getNormalizedProbability(int labelIndex); + /** * Run inference using the prepared input in this.imgData. * The result will be saved in this.labelPropArray. diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java index 9dac61d3c4..b42feed19e 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -33,7 +33,7 @@ public class ImageClassifierFloatInception extends ImageClassifier { * An array to hold inference results, to be feed into Tensorflow Lite as outputs. * This isn't part of the super class, because we need a primitive array here. */ - protected float[][] labelProbArray = null; + private float[][] labelProbArray = null; /** * Initializes an {@code ImageClassifier}. @@ -90,6 +90,12 @@ public class ImageClassifierFloatInception extends ImageClassifier { labelProbArray[0][labelIndex] = value.floatValue(); } + @Override + protected float getNormalizedProbability(int labelIndex) { + // nothing to do here + return getProbability(labelIndex); + } + @Override protected void runInference() { tflite.run(imgData, labelProbArray); diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index 3ca621f900..95f546b8e4 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -27,7 +27,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { * An array to hold inference results, to be feed into Tensorflow Lite as outputs. * This isn't part of the super class, because we need a primitive array here. */ - protected byte[][] labelProbArray = null; + private byte[][] labelProbArray = null; /** * Initializes an {@code ImageClassifier}. @@ -76,7 +76,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { @Override protected float getProbability(int labelIndex) { - return ((labelProbArray[0][labelIndex] & 0xff) / 255.0f); + return labelProbArray[0][labelIndex]; } @Override @@ -84,6 +84,11 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { labelProbArray[0][labelIndex] = value.byteValue(); } + @Override + protected float getNormalizedProbability(int labelIndex) { + return (labelProbArray[0][labelIndex] & 0xff) / 255.0f; + } + @Override protected void runInference() { tflite.run(imgData, labelProbArray); -- GitLab From baa131551e27c83b388e52908d4df4f6cb1fb414 Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 14:45:07 +0100 Subject: [PATCH 053/629] add TODO --- .../android/tflitecamerademo/ImageClassifierFloatInception.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java index b42feed19e..73252ac3e7 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -92,7 +92,7 @@ public class ImageClassifierFloatInception extends ImageClassifier { @Override protected float getNormalizedProbability(int labelIndex) { - // nothing to do here + // TODO the following value isn't in [0,1] yet, but may be greater. Why? return getProbability(labelIndex); } -- GitLab From 74ab43971b630a4e5f76cd0b680f55a1a8412163 Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 14:55:29 +0100 Subject: [PATCH 054/629] reformat --- .../tflitecamerademo/ImageClassifier.java | 4 +- .../ImageClassifierFloatInception.java | 162 +++++++++--------- .../ImageClassifierQuantizedMobileNet.java | 147 ++++++++-------- 3 files changed, 158 insertions(+), 155 deletions(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index c3f0db1638..c319bff9f1 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -266,8 +266,8 @@ public abstract class ImageClassifier { protected abstract float getNormalizedProbability(int labelIndex); /** - * Run inference using the prepared input in this.imgData. - * The result will be saved in this.labelPropArray. + * Run inference using the prepared input in {@link #imgData}. + * Afterwards, the result will be provided by getProbability(). * * This additional method is necessary, because we don't have a common base for different * primitive data types. diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java index 73252ac3e7..be17b85e0c 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -18,86 +18,88 @@ package com.example.android.tflitecamerademo; import android.app.Activity; import java.io.IOException; -import java.nio.ByteBuffer; - +/** + * This classifier works with the Inception-v3 slim model. + * It applies floating point inference rather than using a quantized model. + */ public class ImageClassifierFloatInception extends ImageClassifier { - /** - * The inception net requires additional normalization of the used input. - */ - private static final int IMAGE_MEAN = 128; - private static final float IMAGE_STD = 128.0f; - - /** - * An array to hold inference results, to be feed into Tensorflow Lite as outputs. - * This isn't part of the super class, because we need a primitive array here. - */ - private float[][] labelProbArray = null; - - /** - * Initializes an {@code ImageClassifier}. - * - * @param activity - */ - ImageClassifierFloatInception(Activity activity) throws IOException { - super(activity); - labelProbArray = new float[1][getNumLabels()]; - } - - @Override - protected String getModelPath() { - // you can download this file from - // https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip - return "inceptionv3_slim_2016.tflite"; - } - - @Override - protected String getLabelPath() { - return "labels_imagenet_slim.txt"; - } - - @Override - protected int getImageSizeX() { - return 299; - } - - @Override - protected int getImageSizeY() { - return 299; - } - - @Override - protected int getNumBytesPerChannel() { - // a 32bit float value requires 4 bytes - return 4; - } - - @Override - protected void addPixelValue(int pixelValue) { - imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); - imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); - imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD); - } - - @Override - protected float getProbability(int labelIndex) { - return labelProbArray[0][labelIndex]; - } - - @Override - protected void setProbability(int labelIndex, Number value) { - labelProbArray[0][labelIndex] = value.floatValue(); - } - - @Override - protected float getNormalizedProbability(int labelIndex) { - // TODO the following value isn't in [0,1] yet, but may be greater. Why? - return getProbability(labelIndex); - } - - @Override - protected void runInference() { - tflite.run(imgData, labelProbArray); - } + /** + * The inception net requires additional normalization of the used input. + */ + private static final int IMAGE_MEAN = 128; + private static final float IMAGE_STD = 128.0f; + + /** + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. + */ + private float[][] labelProbArray = null; + + /** + * Initializes an {@code ImageClassifier}. + * + * @param activity + */ + ImageClassifierFloatInception(Activity activity) throws IOException { + super(activity); + labelProbArray = new float[1][getNumLabels()]; + } + + @Override + protected String getModelPath() { + // you can download this file from + // https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip + return "inceptionv3_slim_2016.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_imagenet_slim.txt"; + } + + @Override + protected int getImageSizeX() { + return 299; + } + + @Override + protected int getImageSizeY() { + return 299; + } + + @Override + protected int getNumBytesPerChannel() { + // a 32bit float value requires 4 bytes + return 4; + } + + @Override + protected void addPixelValue(int pixelValue) { + imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD); + } + + @Override + protected float getProbability(int labelIndex) { + return labelProbArray[0][labelIndex]; + } + + @Override + protected void setProbability(int labelIndex, Number value) { + labelProbArray[0][labelIndex] = value.floatValue(); + } + + @Override + protected float getNormalizedProbability(int labelIndex) { + // TODO the following value isn't in [0,1] yet, but may be greater. Why? + return getProbability(labelIndex); + } + + @Override + protected void runInference() { + tflite.run(imgData, labelProbArray); + } } diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index 95f546b8e4..156c895146 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -18,79 +18,80 @@ package com.example.android.tflitecamerademo; import android.app.Activity; import java.io.IOException; -import java.nio.ByteBuffer; - +/** + * This classifier works with the quantized MobileNet model. + */ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { - /** - * An array to hold inference results, to be feed into Tensorflow Lite as outputs. - * This isn't part of the super class, because we need a primitive array here. - */ - private byte[][] labelProbArray = null; - - /** - * Initializes an {@code ImageClassifier}. - * - * @param activity - */ - ImageClassifierQuantizedMobileNet(Activity activity) throws IOException { - super(activity); - labelProbArray = new byte[1][getNumLabels()]; - } - - @Override - protected String getModelPath() { - // you can download this file from - // https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip - return "mobilenet_quant_v1_224.tflite"; - } - - @Override - protected String getLabelPath() { - return "labels_mobilenet_quant_v1_224.txt"; - } - - @Override - protected int getImageSizeX() { - return 224; - } - - @Override - protected int getImageSizeY() { - return 224; - } - - @Override - protected int getNumBytesPerChannel() { - // the quantized model uses a single byte only - return 1; - } - - @Override - protected void addPixelValue(int pixelValue) { - imgData.put((byte) ((pixelValue >> 16) & 0xFF)); - imgData.put((byte) ((pixelValue >> 8) & 0xFF)); - imgData.put((byte) (pixelValue & 0xFF)); - } - - @Override - protected float getProbability(int labelIndex) { - return labelProbArray[0][labelIndex]; - } - - @Override - protected void setProbability(int labelIndex, Number value) { - labelProbArray[0][labelIndex] = value.byteValue(); - } - - @Override - protected float getNormalizedProbability(int labelIndex) { - return (labelProbArray[0][labelIndex] & 0xff) / 255.0f; - } - - @Override - protected void runInference() { - tflite.run(imgData, labelProbArray); - } + /** + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. + */ + private byte[][] labelProbArray = null; + + /** + * Initializes an {@code ImageClassifier}. + * + * @param activity + */ + ImageClassifierQuantizedMobileNet(Activity activity) throws IOException { + super(activity); + labelProbArray = new byte[1][getNumLabels()]; + } + + @Override + protected String getModelPath() { + // you can download this file from + // https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip + return "mobilenet_quant_v1_224.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_mobilenet_quant_v1_224.txt"; + } + + @Override + protected int getImageSizeX() { + return 224; + } + + @Override + protected int getImageSizeY() { + return 224; + } + + @Override + protected int getNumBytesPerChannel() { + // the quantized model uses a single byte only + return 1; + } + + @Override + protected void addPixelValue(int pixelValue) { + imgData.put((byte) ((pixelValue >> 16) & 0xFF)); + imgData.put((byte) ((pixelValue >> 8) & 0xFF)); + imgData.put((byte) (pixelValue & 0xFF)); + } + + @Override + protected float getProbability(int labelIndex) { + return labelProbArray[0][labelIndex]; + } + + @Override + protected void setProbability(int labelIndex, Number value) { + labelProbArray[0][labelIndex] = value.byteValue(); + } + + @Override + protected float getNormalizedProbability(int labelIndex) { + return (labelProbArray[0][labelIndex] & 0xff) / 255.0f; + } + + @Override + protected void runInference() { + tflite.run(imgData, labelProbArray); + } } -- GitLab From 36e84f9d0f4c2dc28675bca8c525d47756833e3e Mon Sep 17 00:00:00 2001 From: Tim H Date: Wed, 7 Feb 2018 15:12:45 +0100 Subject: [PATCH 055/629] change the README to reflect the latest changes --- tensorflow/contrib/lite/README.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 55a524b207..e9d0d9573b 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -6,7 +6,7 @@ TensorFlow Lite uses many techniques for achieving low latency like optimizing t ![image](g3doc/TFLite-Architecture.jpg) # Getting Started with an Android Demo App -This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using a quantized Mobilenet model. A device running Android 5.0 ( API 21) or higher is required to run the demo. +This section contains an example application using TensorFlow Lite for Android devices. The demo is a sample camera app that classifies images continuously using either a quantized Mobilenet model or a floating point Inception-v3 model. A device running Android 5.0 ( API 21) or higher is required to run the demo. There are 3 ways to get the demo app to your device - Download the prebuilt binary or @@ -29,9 +29,16 @@ The simplest way to compile the demo app, and try out changes to the project cod - Make sure the Android SDK version is greater than 26 and NDK version is greater than 14 (in the Android Studio Settings). - Import the `tensorflow/contrib/lite/java/demo` directory as a new Android Studio project. - Click through installing all the Gradle extensions it requests. - - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) - - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: - `tensorflow/contrib/lite/java/demo/app/src/main/assets/` + - Either + - Download the quantized Mobilenet TensorFlow Lite model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) + - unzip and copy mobilenet_quant_v1_224.tflite to the assets directory: + `tensorflow/contrib/lite/java/demo/app/src/main/assets/` + - Or download the floating point Inception-v3 model from [here](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) + - unzip and copy inceptionv3_non_slim_2015.tflite to the assets directory + - change the chosen classifier in [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java) from + `classifier = new ImageClassifierQuantizedMobileNet(getActivity());` + to + `classifier = new ImageClassifierFloatInception(getActivity());` - Build and run the demo app ## Building TensorFlow Lite and the demo app from source @@ -84,7 +91,7 @@ Currently, we only support building the Android demo app within a Python 2 environment (due to a Bazel bug). ### More about the demo -The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used. The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch 224 * 224 is the width and height of the image 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. The Mobilenet model has 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The Mobilenet quantized model is bundled within the assets directory of the app. +The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (229 * 229 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. # iOS Demo App -- GitLab From efcb721c4375f26b7585e41f187dc74aa9f5b891 Mon Sep 17 00:00:00 2001 From: Julian Wolff Date: Mon, 15 Jan 2018 11:28:27 +0100 Subject: [PATCH 056/629] allow 'None' as batch size for TimeFreqLSTMCell --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 6af9db3f15..5e81b067f6 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -424,8 +424,8 @@ class TimeFreqLSTMCell(rnn_cell_impl.RNNCell): "W_O_diag", shape=[self._num_units], dtype=dtype) # initialize the first freq state to be zero - m_prev_freq = array_ops.zeros([int(inputs.get_shape()[0]), self._num_units], - dtype) + m_prev_freq = array_ops.zeros([inputs.get_shape()[0], + self._num_units], dtype) for fq in range(len(freq_inputs)): c_prev = array_ops.slice(state, [0, 2 * fq * self._num_units], [-1, self._num_units]) -- GitLab From 441c3b32da51b8d24b8ece3bdd104f5ec13dc40d Mon Sep 17 00:00:00 2001 From: Julian Wolff Date: Mon, 15 Jan 2018 11:36:33 +0100 Subject: [PATCH 057/629] use array_ops.shape --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 5e81b067f6..e650c1931e 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -424,7 +424,7 @@ class TimeFreqLSTMCell(rnn_cell_impl.RNNCell): "W_O_diag", shape=[self._num_units], dtype=dtype) # initialize the first freq state to be zero - m_prev_freq = array_ops.zeros([inputs.get_shape()[0], + m_prev_freq = array_ops.zeros([array_ops.shape(inputs)[0], self._num_units], dtype) for fq in range(len(freq_inputs)): c_prev = array_ops.slice(state, [0, 2 * fq * self._num_units], -- GitLab From 83f06ec185ee87fc57220f2f63245d81aa3c9311 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 7 Feb 2018 20:15:10 -0800 Subject: [PATCH 058/629] Adding the Visual Studio specification for cmake for the 1.6 branch. (#16852) --- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index b87e4a9bec..e545146188 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -37,7 +37,7 @@ SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX -G "Visual Studio 14 2015" :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 59e87f52e6c7059920b44375dd697be865624c1d Mon Sep 17 00:00:00 2001 From: Julian Wolff Date: Thu, 8 Feb 2018 10:27:54 +0100 Subject: [PATCH 059/629] preserve static shape info --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index e650c1931e..21f6c91fc4 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -424,7 +424,7 @@ class TimeFreqLSTMCell(rnn_cell_impl.RNNCell): "W_O_diag", shape=[self._num_units], dtype=dtype) # initialize the first freq state to be zero - m_prev_freq = array_ops.zeros([array_ops.shape(inputs)[0], + m_prev_freq = array_ops.zeros([inputs.shape[0].value or array_ops.shape(inputs)[0], self._num_units], dtype) for fq in range(len(freq_inputs)): c_prev = array_ops.slice(state, [0, 2 * fq * self._num_units], -- GitLab From f70db141135dd463c294e76284cab7331c55933e Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 8 Feb 2018 11:48:21 -0800 Subject: [PATCH 060/629] Update rnn_cell.py --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 21f6c91fc4..0e73a7b901 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -424,8 +424,9 @@ class TimeFreqLSTMCell(rnn_cell_impl.RNNCell): "W_O_diag", shape=[self._num_units], dtype=dtype) # initialize the first freq state to be zero - m_prev_freq = array_ops.zeros([inputs.shape[0].value or array_ops.shape(inputs)[0], - self._num_units], dtype) + m_prev_freq = array_ops.zeros( + [inputs.shape[0].value or inputs.get_shape()[0], self._num_units], + dtype) for fq in range(len(freq_inputs)): c_prev = array_ops.slice(state, [0, 2 * fq * self._num_units], [-1, self._num_units]) -- GitLab From 4ca7415c79f600e261b35e12a58a9659d5e51be8 Mon Sep 17 00:00:00 2001 From: mholzel Date: Fri, 9 Feb 2018 15:19:15 +0100 Subject: [PATCH 061/629] Added detailed discussion of non-strict semantics --- tensorflow/python/ops/control_flow_ops.py | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 9ae9a71e4b..a03834600c 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3120,6 +3120,43 @@ def while_loop(cond, c, b, loop_vars=[i0, m0], shape_invariants=[i0.get_shape(), tf.TensorShape([None, 2])]) ``` + + Example which demonstrates non-strict semantics: In the following + example, the final value of the counter `i` does not depend on `x`. So + the `while_loop` can increment the counter parallel to updates of `x`. + However, because the loop counter at one loop iteration depends + on the value at the previous iteration, the loop counter itself cannot + be incremented in parallel. Hence if we just want the final value of the + counter (which we print on the line `print(sess.run(i))`), then + `x` will never be incremented, but the counter will be updated on a + single thread. Conversely, if we want the value of the output (which we + print on the line `print(sess.run(out).shape)`), then the counter may be + incremented on its own thread, while `x` can be incremented in + parallel on a separate thread. In the extreme case, it is conceivable + that the thread incrementing the counter runs until completion before + `x` is incremented even a single time. The only thing that can never + happen is that the thread updating `x` can never get ahead of the + counter thread because the thread incrementing `x` depends on the value + of the counter. + ```python + import tensorflow as tf + + n = 10000 + x = tf.constant(list(range(n))) + c = lambda i, x: i < n + b = lambda i, x: (tf.Print(i + 1, [i]), tf.Print(x + 1, [i], "x:")) + i, out = tf.while_loop(c, b, (0, x)) + with tf.Session() as sess: + print(sess.run(i)) # prints [0] ... [9999] + + # The following line may increment the counter and x in parallel. + # The counter thread may get ahead of the other thread, but not the + # other way around. So you may see things like + # [9996] x:[9987] + # meaning that the counter thread is on iteration 9996, + # while the other thread is on iteration 9987 + print(sess.run(out).shape) + ``` """ with ops.name_scope(name, "while", loop_vars): -- GitLab From 963941e7d639b3211a5792b1086128db554a740a Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Fri, 9 Feb 2018 13:15:31 -0800 Subject: [PATCH 062/629] Update tensorboard dependency to 1.6.0+ and new name (#16815) * Update tensorboard dependency to 1.6.0+ and new name * Mention tensorboard package name change in RELEASE.md --- RELEASE.md | 4 ++++ tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 0fad3b5d41..de4a34bb04 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -59,6 +59,10 @@ newcomers. TensorFlow will print a warning if you use XLA:GPU with a known-bad version of CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. +* The `tensorboard` command or module may appear to be missing after certain + upgrade flows. This is due to pip package conflicts as a result of changing + the TensorBoard package name. See the [TensorBoard 1.6.0 release notes]( + https://github.com/tensorflow/tensorboard/releases/tag/1.6.0) for a fix. ## Thanks to our Contributors diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index fe2c22f2f5..0d4fa465ad 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorflow-tensorboard >= 1.5.0, < 1.6.0', + 'tensorboard >= 1.6.0, < 1.7.0', 'termcolor >= 1.1.0', ] -- GitLab From f06b0f8057dfdffbb0179eac12c53ea2cc5042b8 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Sat, 10 Feb 2018 12:34:17 +0800 Subject: [PATCH 063/629] fix expression must have a constant value tensorflow\tensorflow/core/framework/tensor_shape.h(468): error : expression must have a constant value static_assert(NDIMS <= TensorShape::MaxDimensions(), "Too many dimensions"); ^ tensorflow\tensorflow/core/framework/tensor_shape.h(468): note: constexpr function function "tensorflow::TensorShapeBase::MaxDimensions [with Shape=tensorflow::TensorShape]" (declared at line 195) is not defined static_assert(NDIMS <= TensorShape::MaxDimensions(), "Too many dimensions"); ^ --- tensorflow/core/framework/tensor_shape.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/core/framework/tensor_shape.h b/tensorflow/core/framework/tensor_shape.h index adb41b81c6..fe2ba375aa 100644 --- a/tensorflow/core/framework/tensor_shape.h +++ b/tensorflow/core/framework/tensor_shape.h @@ -191,9 +191,6 @@ class TensorShapeBase : public TensorShapeRep { /// Appends all the dimensions from `shape`. void AppendShape(const TensorShapeBase& shape); - // Maximum number of dimensions in a tensor. - static constexpr int MaxDimensions() { return 254; } - /// \brief Insert a dimension somewhere in the `TensorShape`. /// REQUIRES: `0 <= d <= dims()` /// REQUIRES: `size >= 0` -- GitLab From 61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sat, 10 Feb 2018 09:50:40 -0800 Subject: [PATCH 064/629] Update version string to 1.6.0-rc1 --- .../contrib/tpu/profiler/pip_package/setup.py | 2 +- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 14 ++++++------ tensorflow/tools/pip_package/setup.py | 2 +- 9 files changed, 39 insertions(+), 39 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index cb61984799..52984cd6fd 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 50bfa91267..7405e01e14 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index a783205b4a..f3620cf687 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 5249e04615..4bf4bacaec 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0c6c773e62..1905f9729e 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0-rc0 + 1.6.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0-rc0 + 1.6.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0-rc0 + 1.6.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.6.0-rc0 + 1.6.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 105b225177..62bd45650a 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index a6ea548cfb..e3832a7a2a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -238,7 +238,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl @@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
 
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 36dffd85dc..051da692d3 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0rc0 on Linux: +for TensorFlow 1.6.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
 
## Validate your installation @@ -460,8 +460,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.5.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- - + + @@ -479,7 +479,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
tensorflow_gpu-1.5.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.079
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
- + @@ -493,8 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
- - + + diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 0d4fa465ad..a835275dae 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 9832df4e7acf093e2fccbb84c5362ee201ea4321 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Sun, 11 Feb 2018 20:48:10 +0800 Subject: [PATCH 065/629] fix type name is not allowed --- .../image/kernels/bipartite_match_op.cc | 2 +- .../seq2seq/kernels/beam_search_ops.cc | 8 ++++---- tensorflow/core/kernels/colorspace_op.cc | 2 +- .../core/kernels/non_max_suppression_op.cc | 5 ++--- .../kernels/quantized_resize_bilinear_op.cc | 4 ++-- tensorflow/core/kernels/random_crop_op.cc | 4 ++-- tensorflow/core/kernels/resize_area_op.cc | 5 ++--- tensorflow/core/kernels/resize_bicubic_op.cc | 10 ++++------ tensorflow/core/kernels/resize_bilinear_op.cc | 10 ++++------ .../kernels/resize_nearest_neighbor_op.cc | 8 ++++---- .../sample_distorted_bounding_box_op.cc | 6 +++--- tensorflow/core/kernels/substr_op.cc | 20 +++++++++---------- 12 files changed, 39 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/image/kernels/bipartite_match_op.cc b/tensorflow/contrib/image/kernels/bipartite_match_op.cc index 7d207c388b..726adb0777 100644 --- a/tensorflow/contrib/image/kernels/bipartite_match_op.cc +++ b/tensorflow/contrib/image/kernels/bipartite_match_op.cc @@ -85,7 +85,7 @@ class BipartiteMatchOp : public OpKernel { context->allocate_output(1, TensorShape({num_input_columns}), &column_to_row_match_indices)); - typename TTypes::ConstTensor distance_mat = + TTypes::ConstTensor distance_mat = input_distance_mat.shaped( {num_input_rows, num_input_columns}); diff --git a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc index 64973ccccd..dfa12e873a 100644 --- a/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc +++ b/tensorflow/contrib/seq2seq/kernels/beam_search_ops.cc @@ -80,12 +80,12 @@ class GatherTreeOp : public OpKernel { max_sequence_lengths.shape().DebugString())); Tensor* beams; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, step_ids_shape, &beams)); - typename TTypes::ConstTensor step_ids_t = step_ids.tensor(); - typename TTypes::ConstTensor parent_ids_t = parent_ids.tensor(); + typename TTypes::ConstTensor step_ids_t(step_ids.tensor()); + typename TTypes::ConstTensor parent_ids_t(parent_ids.tensor()); typename TTypes::ConstVec max_seq_lens_t = max_sequence_lengths.vec(); - typename TTypes::ConstScalar end_token_t = end_token.scalar(); - typename TTypes::Tensor beams_t = beams->tensor(); + typename TTypes::ConstScalar end_token_t(end_token.scalar()); + typename TTypes::Tensor beams_t(beams->tensor()); const T end_token_value = end_token_t(); functor::GatherTree()(ctx, device, step_ids_t, parent_ids_t, max_seq_lens_t, end_token_value, beams_t); diff --git a/tensorflow/core/kernels/colorspace_op.cc b/tensorflow/core/kernels/colorspace_op.cc index 9cc2e67bbe..f4402a245d 100644 --- a/tensorflow/core/kernels/colorspace_op.cc +++ b/tensorflow/core/kernels/colorspace_op.cc @@ -71,7 +71,7 @@ class RGBToHSVOp : public OpKernel { TensorShape({input_data.dimension(0)}), &trange)); - typename TTypes::Tensor range = trange.tensor(); + typename TTypes::Tensor range(trange.tensor()); functor::RGBToHSV()(context->eigen_device(), input_data, range, output_data); diff --git a/tensorflow/core/kernels/non_max_suppression_op.cc b/tensorflow/core/kernels/non_max_suppression_op.cc index 5d28b87e6b..903b898d0a 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cc +++ b/tensorflow/core/kernels/non_max_suppression_op.cc @@ -105,7 +105,7 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& boxes, } const int output_size = std::min(max_output_size.scalar()(), num_boxes); - typename TTypes::ConstTensor boxes_data = boxes.tensor(); + TTypes::ConstTensor boxes_data = boxes.tensor(); std::vector scores_data(num_boxes); std::copy_n(scores.flat().data(), num_boxes, scores_data.begin()); @@ -138,8 +138,7 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& boxes, Tensor* output = nullptr; TensorShape output_shape({static_cast(selected.size())}); OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); - typename TTypes::Tensor selected_indices_data = - output->tensor(); + TTypes::Tensor selected_indices_data = output->tensor(); std::copy_n(selected.begin(), selected.size(), selected_indices_data.data()); } diff --git a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc index fb2faede2f..9a1dcd0d49 100644 --- a/tensorflow/core/kernels/quantized_resize_bilinear_op.cc +++ b/tensorflow/core/kernels/quantized_resize_bilinear_op.cc @@ -697,8 +697,8 @@ class QuantizedResizeBilinearOp : public OpKernel { // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor image_data = input.tensor(); - typename TTypes::Tensor output_data = st.output->tensor(); + typename TTypes::ConstTensor image_data(input.tensor()); + typename TTypes::Tensor output_data(st.output->tensor()); ResizeBilinear(image_data, st.height_scale, st.width_scale, in_min, in_max, &output_data); diff --git a/tensorflow/core/kernels/random_crop_op.cc b/tensorflow/core/kernels/random_crop_op.cc index 554909760a..b89bda4769 100644 --- a/tensorflow/core/kernels/random_crop_op.cc +++ b/tensorflow/core/kernels/random_crop_op.cc @@ -92,8 +92,8 @@ class RandomCropOp : public OpKernel { // TODO(shlens): Do this more efficiently with memcpy once padding is // available for smaller images. - typename TTypes::ConstTensor input_data = input.tensor(); - typename TTypes::Tensor output_data = output->tensor(); + typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::Tensor output_data(output->tensor()); for (int y = 0; y < target_height; ++y) { for (int x = 0; x < target_width; ++x) { diff --git a/tensorflow/core/kernels/resize_area_op.cc b/tensorflow/core/kernels/resize_area_op.cc index ada50dfb70..98b8a0df28 100644 --- a/tensorflow/core/kernels/resize_area_op.cc +++ b/tensorflow/core/kernels/resize_area_op.cc @@ -149,7 +149,7 @@ class ResizeAreaOp : public OpKernel { if (!context->status().ok()) return; - typename TTypes::ConstTensor input_data = input.tensor(); + typename TTypes::ConstTensor input_data(input.tensor()); // Precompute values used when iterating over x coordinates within a row. // Note that it may be useful to cache x_interps for a given @@ -190,8 +190,7 @@ class ResizeAreaOp : public OpKernel { void ComputeLoop(const ImageResizerState& st, const std::vector& x_interps, typename TTypes::ConstTensor input_data) { - typename TTypes::Tensor output_data = - st.output->tensor(); + TTypes::Tensor output_data = st.output->tensor(); // When using this algorithm for downsizing, the target pixel value is the // weighted average of all the source pixels. The weight is determined by diff --git a/tensorflow/core/kernels/resize_bicubic_op.cc b/tensorflow/core/kernels/resize_bicubic_op.cc index 86e61bbcef..65014b6c44 100644 --- a/tensorflow/core/kernels/resize_bicubic_op.cc +++ b/tensorflow/core/kernels/resize_bicubic_op.cc @@ -480,9 +480,8 @@ class ResizeBicubicOp : public OpKernel { if (!context->status().ok()) return; - typename TTypes::ConstTensor input_data = input.tensor(); - typename TTypes::Tensor output_data = - st.output->tensor(); + typename TTypes::ConstTensor input_data(input.tensor()); + TTypes::Tensor output_data = st.output->tensor(); interpolate_with_caching(input_data, st, output_data); } @@ -510,9 +509,8 @@ class ResizeBicubicOpGrad : public OpKernel { if (!context->status().ok()) return; - typename TTypes::ConstTensor input_grad = - input.tensor(); - typename TTypes::Tensor output_grad = st.output->tensor(); + TTypes::ConstTensor input_grad = input.tensor(); + typename TTypes::Tensor output_grad(st.output->tensor()); ResizeBicubicGrad(input_grad, st, output_grad); } diff --git a/tensorflow/core/kernels/resize_bilinear_op.cc b/tensorflow/core/kernels/resize_bilinear_op.cc index d9cb993a4b..dde59e8e74 100644 --- a/tensorflow/core/kernels/resize_bilinear_op.cc +++ b/tensorflow/core/kernels/resize_bilinear_op.cc @@ -51,9 +51,8 @@ class ResizeBilinearOp : public OpKernel { // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor image_data = input.tensor(); - typename TTypes::Tensor output_data = - st.output->tensor(); + typename TTypes::ConstTensor image_data(input.tensor()); + TTypes::Tensor output_data = st.output->tensor(); functor::ResizeBilinear()(context->eigen_device(), image_data, st.height_scale, @@ -258,9 +257,8 @@ class ResizeBilinearOpGrad : public OpKernel { if (!context->status().ok()) return; - typename TTypes::ConstTensor input_grad = - input.tensor(); - typename TTypes::Tensor output_grad = st.output->tensor(); + TTypes::ConstTensor input_grad = input.tensor(); + typename TTypes::Tensor output_grad(st.output->tensor()); functor::ResizeBilinearGrad()(context->eigen_device(), input_grad, st.height_scale, diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc index bfd29b7ec8..8ec526c2b2 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc @@ -56,8 +56,8 @@ class ResizeNearestNeighborOp : public OpKernel { // Return if the output is empty. if (st.output->NumElements() == 0) return; - typename TTypes::ConstTensor input_data = input.tensor(); - typename TTypes::Tensor output_data = st.output->tensor(); + typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::Tensor output_data(st.output->tensor()); bool status; if (align_corners_) { @@ -162,8 +162,8 @@ class ResizeNearestNeighborOpGrad : public OpKernel { // Return if the output is empty. if (output->NumElements() == 0) return; - typename TTypes::ConstTensor input_data = input.tensor(); - typename TTypes::Tensor output_data = output->tensor(); + typename TTypes::ConstTensor input_data(input.tensor()); + typename TTypes::Tensor output_data(output->tensor()); const float height_scale = CalculateResizeScale(out_height, in_height, align_corners_); diff --git a/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc b/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc index 44a817a5c7..c0fde8042e 100644 --- a/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc +++ b/tensorflow/core/kernels/sample_distorted_bounding_box_op.cc @@ -387,9 +387,9 @@ class SampleDistortedBoundingBoxV2Op : public OpKernel { OP_REQUIRES_OK( context, context->allocate_output(2, TensorShape({1, 1, 4}), &bboxes)); - typename TTypes::Tensor begin_data = begin->tensor(); - typename TTypes::Tensor size_data = size->tensor(); - typename TTypes::Tensor bboxes_data = bboxes->tensor(); + typename TTypes::Tensor begin_data(begin->tensor()); + typename TTypes::Tensor size_data(size->tensor()); + TTypes::Tensor bboxes_data = bboxes->tensor(); begin_data(0) = T(offset_height); size_data(0) = T(target_height); diff --git a/tensorflow/core/kernels/substr_op.cc b/tensorflow/core/kernels/substr_op.cc index e29f67297f..22e45918a0 100644 --- a/tensorflow/core/kernels/substr_op.cc +++ b/tensorflow/core/kernels/substr_op.cc @@ -115,7 +115,7 @@ class SubstrOp : public OpKernel { Tensor input_buffer; OP_REQUIRES_OK(context, context->allocate_temp( DT_STRING, output_shape, &input_buffer)); - typename TTypes::Tensor input_bcast = + TTypes::Tensor input_bcast = input_buffer.shaped(bcast.result_shape()); input_bcast = input.broadcast(BCast::ToIndexArray<1>(bcast.x_bcast())); @@ -125,8 +125,8 @@ class SubstrOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), output_shape, &pos_buffer)); - typename TTypes::Tensor pos_bcast = - pos_buffer.shaped(bcast.result_shape()); + typename TTypes::Tensor pos_bcast( + pos_buffer.shaped(bcast.result_shape())); pos_bcast = pos_shaped.broadcast(BCast::ToIndexArray<1>(bcast.y_bcast())); @@ -135,8 +135,8 @@ class SubstrOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), output_shape, &len_buffer)); - typename TTypes::Tensor len_bcast = - len_buffer.shaped(bcast.result_shape()); + typename TTypes::Tensor len_bcast( + len_buffer.shaped(bcast.result_shape())); len_bcast = len_shaped.broadcast(BCast::ToIndexArray<1>(bcast.y_bcast())); @@ -164,7 +164,7 @@ class SubstrOp : public OpKernel { Tensor input_buffer; OP_REQUIRES_OK(context, context->allocate_temp( DT_STRING, output_shape, &input_buffer)); - typename TTypes::Tensor input_bcast = + TTypes::Tensor input_bcast = input_buffer.shaped(bcast.result_shape()); input_bcast = input.broadcast(BCast::ToIndexArray<2>(bcast.x_bcast())); @@ -174,8 +174,8 @@ class SubstrOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), output_shape, &pos_buffer)); - typename TTypes::Tensor pos_bcast = - pos_buffer.shaped(bcast.result_shape()); + typename TTypes::Tensor pos_bcast( + pos_buffer.shaped(bcast.result_shape())); pos_bcast = pos_shaped.broadcast(BCast::ToIndexArray<2>(bcast.y_bcast())); @@ -184,8 +184,8 @@ class SubstrOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_temp(DataTypeToEnum::v(), output_shape, &len_buffer)); - typename TTypes::Tensor len_bcast = - len_buffer.shaped(bcast.result_shape()); + typename TTypes::Tensor len_bcast( + len_buffer.shaped(bcast.result_shape())); len_bcast = len_shaped.broadcast(BCast::ToIndexArray<2>(bcast.y_bcast())); -- GitLab From e245ea9163fef4c2996d9ba8e14d703f1d153ed6 Mon Sep 17 00:00:00 2001 From: Neal Wu Date: Mon, 12 Feb 2018 10:47:26 -0800 Subject: [PATCH 066/629] Change the column name in tutorials/wide.md from 'income' to 'income_bracket' to match the code PiperOrigin-RevId: 185400490 --- tensorflow/docs_src/tutorials/wide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index dba6f54c52..005dc020f9 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -82,7 +82,7 @@ Here's a list of columns available in the Census Income dataset: | hours_per_week | Continuous | Hours worked per week. | | native_country | Categorical | Country of origin of the | : : : individual. : -| income | Categorical | ">50K" or "<=50K", meaning | +| income_bracket | Categorical | ">50K" or "<=50K", meaning | : : : whether the person makes more : : : : than $50,000 annually. : -- GitLab From 075931641e9147f0faf16e0ce2b76525620e1be0 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 12 Feb 2018 11:12:04 -0800 Subject: [PATCH 067/629] Make variable_ops_test optonly variable_ops_test sometimes times out in fastbuild mode. So mark it as optonly. Running this test with `bazel test -c opt` passes all 1000 of 1000 reruns. Running it with just `bazel test` fails 5 out of 300 reruns. PiperOrigin-RevId: 185404726 --- tensorflow/compiler/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 25e329b6aa..db65ee13d1 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -639,6 +639,7 @@ tf_xla_py_test( name = "variable_ops_test", size = "small", srcs = ["variable_ops_test.py"], + tags = ["optonly"], deps = [ ":xla_test", "//tensorflow/python:array_ops", -- GitLab From fabf6ddede109bbf18115718224449c314bcf92a Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 12 Feb 2018 11:26:22 -0800 Subject: [PATCH 068/629] [XLA] An HLO pass that folds BF16 F32 conversions: if an HLO already supports BF16 input/output, conversions before/after it will be removed and the HLO's input/output types will be converted to BF16. Also updates HloVerifier to allow mixed precision if requested. If an HLO has both both F32 and BF16 inputs, ShapeInference will use F32 as the output type. PiperOrigin-RevId: 185407143 --- tensorflow/compiler/xla/service/BUILD | 75 ++++ .../service/bfloat16_conversion_folding.cc | 184 +++++++++ .../xla/service/bfloat16_conversion_folding.h | 52 +++ .../bfloat16_conversion_folding_test.cc | 209 +++++++++++ .../xla/service/bfloat16_normalization.cc | 351 ++++++++++++++++++ .../xla/service/bfloat16_normalization.h | 92 +++++ .../service/bfloat16_normalization_test.cc | 248 +++++++++++++ .../compiler/xla/service/bfloat16_support.cc | 111 ++++++ .../compiler/xla/service/bfloat16_support.h | 60 +++ .../compiler/xla/service/hlo_instruction.cc | 7 +- .../compiler/xla/service/hlo_verifier.cc | 129 ++++++- .../compiler/xla/service/hlo_verifier.h | 33 +- .../compiler/xla/service/shape_inference.cc | 132 ++++--- tensorflow/compiler/xla/shape_util.cc | 13 + tensorflow/compiler/xla/shape_util.h | 30 ++ tensorflow/compiler/xla/shape_util_test.cc | 28 ++ 16 files changed, 1689 insertions(+), 65 deletions(-) create mode 100644 tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc create mode 100644 tensorflow/compiler/xla/service/bfloat16_conversion_folding.h create mode 100644 tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc create mode 100644 tensorflow/compiler/xla/service/bfloat16_normalization.cc create mode 100644 tensorflow/compiler/xla/service/bfloat16_normalization.h create mode 100644 tensorflow/compiler/xla/service/bfloat16_normalization_test.cc create mode 100644 tensorflow/compiler/xla/service/bfloat16_support.cc create mode 100644 tensorflow/compiler/xla/service/bfloat16_support.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 93cc5ab1a9..9f5f2f96b7 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -43,6 +43,81 @@ filegroup( ]), ) +cc_library( + name = "bfloat16_support", + srcs = ["bfloat16_support.cc"], + hdrs = ["bfloat16_support.h"], + deps = [ + ":hlo", + ], +) + +cc_library( + name = "bfloat16_conversion_folding", + srcs = ["bfloat16_conversion_folding.cc"], + hdrs = ["bfloat16_conversion_folding.h"], + deps = [ + ":bfloat16_support", + ":hlo", + ":hlo_pass", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "bfloat16_conversion_folding_test", + srcs = ["bfloat16_conversion_folding_test.cc"], + deps = [ + ":bfloat16_conversion_folding", + ":bfloat16_support", + ":hlo", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "bfloat16_normalization", + srcs = ["bfloat16_normalization.cc"], + hdrs = ["bfloat16_normalization.h"], + deps = [ + ":bfloat16_support", + ":hlo", + ":hlo_pass", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "bfloat16_normalization_test", + srcs = ["bfloat16_normalization_test.cc"], + deps = [ + ":bfloat16_normalization", + ":bfloat16_support", + ":hlo", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", + ], +) + cc_library( name = "shape_inference", srcs = ["shape_inference.cc"], diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc new file mode 100644 index 0000000000..cde990e176 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -0,0 +1,184 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/bfloat16_conversion_folding.h" + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +class BFloat16ConversionFoldingVisitor : public DfsHloVisitorWithDefault { + public: + explicit BFloat16ConversionFoldingVisitor( + HloComputation* computation, const BFloat16Support* bfloat16_support) + : computation_(computation), bfloat16_support_(bfloat16_support) {} + + Status DefaultAction(HloInstruction* hlo) override; + + static bool Run(HloComputation* computation, + const BFloat16Support* bfloat16_support) { + BFloat16ConversionFoldingVisitor visitor(computation, bfloat16_support); + TF_CHECK_OK(computation->Accept(&visitor)); + return visitor.changed_; + } + + private: + // Checks if the HLO has a BF16 -> F32 conversion as input, or a F32 -> BF16 + // conversion as output, and folds them to the HLO itself if feasible. + Status TryFoldBF16Conversions(HloInstruction* hlo); + + // Folds the F32 -> BF16 conversions from the HLO's output. + // + // Precondition: all of the HLO's users are F32 -> BF16 conversions. + Status FoldOutputConversions(HloInstruction* hlo); + + // Folds the BF16 -> F32 conversion operand to the HLO. + // + // Precondition: the operand is a F32 -> BF16 conversion. + Status FoldOperandConversion(HloInstruction* hlo, int64 operand_index); + + HloComputation* computation_; + const BFloat16Support* bfloat16_support_; + bool changed_ = false; +}; + +Status BFloat16ConversionFoldingVisitor::FoldOutputConversions( + HloInstruction* hlo) { + std::vector materialized_users = hlo->users(); + hlo->mutable_shape()->set_element_type(BF16); + for (auto user : materialized_users) { + CHECK_EQ(user->opcode(), HloOpcode::kConvert); + TF_RETURN_IF_ERROR(user->ReplaceAllUsesWith(hlo)); + changed_ = true; + } + return Status::OK(); +} + +Status BFloat16ConversionFoldingVisitor::FoldOperandConversion( + HloInstruction* hlo, int64 operand_index) { + // The operand is a convert from BF16 to F32. + auto operand = hlo->mutable_operand(operand_index); + CHECK_EQ(operand->opcode(), HloOpcode::kConvert); + TF_RETURN_IF_ERROR( + hlo->ReplaceOperandWith(operand_index, operand->mutable_operand(0))); + changed_ = true; + return Status::OK(); +} + +Status BFloat16ConversionFoldingVisitor::TryFoldBF16Conversions( + HloInstruction* hlo) { + std::vector bf16_to_f32_operands; + bool has_other_f32_operands = false; + for (int64 i = 0; i < hlo->operands().size(); ++i) { + auto operand = hlo->operand(i); + if (operand->shape().element_type() == F32) { + if (operand->opcode() == HloOpcode::kConvert && + operand->operand(0)->shape().element_type() == BF16 && + bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + // Operand is a convert from BF16 to F32 and we support BF16 input + // directly in the current HLO at the operand index. + bf16_to_f32_operands.push_back(i); + } else { + has_other_f32_operands = true; + } + continue; + } + } + + bool fold_output_conversion = hlo->user_count() > 0 && + hlo->shape().element_type() == F32 && + bfloat16_support_->SupportsBF16Output(*hlo) && + hlo != computation_->root_instruction(); + if (fold_output_conversion) { + for (auto user : hlo->users()) { + if (user->opcode() == HloOpcode::kConvert && + user->shape().element_type() == BF16) { + continue; + } + // We should not change the output type if any user is not a conversion + // from F32 to BF16. + fold_output_conversion = false; + break; + } + } + + if (!bfloat16_support_->SupportsMixedPrecisions(*hlo)) { + if (has_other_f32_operands || + (!fold_output_conversion && hlo->shape().element_type() == F32)) { + // Some of the operands/output will remain F32, but we cannot use mixed + // precisions, so we cannot do anything here. + return Status::OK(); + } + } + + if (fold_output_conversion) { + TF_RETURN_IF_ERROR(FoldOutputConversions(hlo)); + } + + for (int64 i : bf16_to_f32_operands) { + TF_RETURN_IF_ERROR(FoldOperandConversion(hlo, i)); + } + return Status::OK(); +} + +Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) { + // Do not fold BF16 conversions for instructions related to tuples, entry and + // exit of a computation, fusion, convert, and control flow. + if (hlo->opcode() == HloOpcode::kTuple || // + hlo->opcode() == HloOpcode::kGetTupleElement || // + hlo->opcode() == HloOpcode::kInfeed || // + hlo->opcode() == HloOpcode::kOutfeed || // + hlo->opcode() == HloOpcode::kConstant || // + hlo->opcode() == HloOpcode::kParameter || // + hlo->opcode() == HloOpcode::kFusion || // + hlo->opcode() == HloOpcode::kConvert || // + hlo->opcode() == HloOpcode::kCall || // + hlo->opcode() == HloOpcode::kCustomCall || // + hlo->opcode() == HloOpcode::kWhile || // + hlo->opcode() == HloOpcode::kConditional) { + return Status::OK(); + } + if (hlo == computation_->root_instruction() && + !bfloat16_support_->SupportsMixedPrecisions(*hlo)) { + // If hlo is the root instruction, we cannot change its output, so folding + // can only happen when it supports mixed precision so that we can change + // its operands. + return Status::OK(); + } + return TryFoldBF16Conversions(hlo); +} + +StatusOr BFloat16ConversionFolding::Run(HloModule* module) { + XLA_VLOG_LINES( + 2, "BFloat16ConversionFolding::Run(), before:\n" + module->ToString()); + bool changed = false; + for (auto* comp : module->MakeNonfusionComputations()) { + if (BFloat16ConversionFoldingVisitor::Run(comp, bfloat16_support_)) { + changed = true; + } + } + XLA_VLOG_LINES( + 2, "BFloat16ConversionFolding::Run(), after:\n" + module->ToString()); + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h new file mode 100644 index 0000000000..c939838709 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.h @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_CONVERSION_FOLDING_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_CONVERSION_FOLDING_H_ + +#include "tensorflow/compiler/xla/service/bfloat16_support.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// A pass which folds F32 <-> BF16 conversions to their operands or users, when +// it is supported by the backend. +// +// This pass follows the passed-in backend-specific BF16 support rules, but can +// introduce mixed precision in individual HLOs which breaks the assumption of +// some other HLO passes. So it should be used at the end of the HLO +// optimization pipeline followed by a DCE pass. If other passes are needed +// after this pass, run BFloat16MixedPrecisionRemoval first to undo some of the +// changed made by this pass. +class BFloat16ConversionFolding : public HloPassInterface { + public: + explicit BFloat16ConversionFolding(const BFloat16Support* bfloat16_support) + : bfloat16_support_(bfloat16_support) {} + + ~BFloat16ConversionFolding() override = default; + tensorflow::StringPiece name() const override { return "bfloat16-fold"; } + + // Run BF16 conversion folding on the given computation. Returns whether the + // computation was changed. + StatusOr Run(HloModule* module) override; + + private: + const BFloat16Support* bfloat16_support_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_CONVERSION_FOLDING_H_ diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc new file mode 100644 index 0000000000..cb37759439 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc @@ -0,0 +1,209 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/bfloat16_conversion_folding.h" +#include "tensorflow/compiler/xla/service/bfloat16_support.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { + +class TestBFloat16Support : public BFloat16Support { + public: + TestBFloat16Support() {} + ~TestBFloat16Support() override {} + + bool SupportsBF16Operand(const HloInstruction& hlo, + int64 operand_index) const override { + if (hlo.opcode() == HloOpcode::kAdd || + hlo.opcode() == HloOpcode::kSubtract || + hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } + + bool SupportsBF16Output(const HloInstruction& hlo) const override { + if (hlo.opcode() == HloOpcode::kAdd || + hlo.opcode() == HloOpcode::kSubtract || + hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } + + bool SupportsMixedPrecisions(const HloInstruction& hlo) const override { + if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } +}; + +class BFloat16ConversionFoldingTest : public HloTestBase { + protected: + bool FoldConversions(HloModule* module) { + TestBFloat16Support bfloat16_support_; + BFloat16ConversionFolding fold(&bfloat16_support_); + StatusOr result = fold.Run(module); + EXPECT_IS_OK(result.status()); + return result.ValueOrDie(); + } +}; + +TEST_F(BFloat16ConversionFoldingTest, FoldIfSupported) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kAdd, a, b)); + HloInstruction* convert0 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, add0)); + HloInstruction* convert1 = builder.AddInstruction( + HloInstruction::CreateConvert(f32_shape, convert0)); + + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kAdd, convert1, c)); + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, add1)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(FoldConversions(module.get())); + + EXPECT_EQ(computation->root_instruction(), add1); + EXPECT_EQ(add0->shape().element_type(), BF16); + EXPECT_EQ(add1->shape().element_type(), BF16); + EXPECT_EQ(add1->operand(0), add0); +} + +TEST_F(BFloat16ConversionFoldingTest, DoNotFoldIfUnsupported) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* mul0 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kMultiply, a, b)); + HloInstruction* convert0 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, mul0)); + HloInstruction* convert1 = builder.AddInstruction( + HloInstruction::CreateConvert(f32_shape, convert0)); + + HloInstruction* mul1 = builder.AddInstruction(HloInstruction::CreateBinary( + f32_shape, HloOpcode::kMultiply, convert1, c)); + HloInstruction* convert2 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, mul1)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_FALSE(FoldConversions(module.get())); + + EXPECT_EQ(computation->root_instruction(), convert2); + EXPECT_EQ(mul0->shape().element_type(), F32); + EXPECT_EQ(mul1->shape().element_type(), F32); + EXPECT_EQ(mul1->operand(0), convert1); +} + +TEST_F(BFloat16ConversionFoldingTest, DoNotFoldUnsupportedMixedPrecision) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* sub0 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kSubtract, a, b)); + HloInstruction* convert0 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, sub0)); + HloInstruction* convert1 = builder.AddInstruction( + HloInstruction::CreateConvert(f32_shape, convert0)); + + HloInstruction* sub1 = builder.AddInstruction(HloInstruction::CreateBinary( + f32_shape, HloOpcode::kSubtract, convert1, c)); + HloInstruction* convert2 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, sub1)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_FALSE(FoldConversions(module.get())); + + EXPECT_EQ(computation->root_instruction(), convert2); + EXPECT_EQ(sub0->shape().element_type(), F32); + EXPECT_EQ(sub1->shape().element_type(), F32); + EXPECT_EQ(sub1->operand(0), convert1); +} + +TEST_F(BFloat16ConversionFoldingTest, DoNotFoldTuple) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + HloInstruction* convert0 = + builder.AddInstruction(HloInstruction::CreateConvert(f32_shape, b)); + + HloInstruction* tuple = + builder.AddInstruction(HloInstruction::CreateTuple({a, convert0})); + HloInstruction* gte = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32_shape, tuple, 0)); + HloInstruction* convert1 = + builder.AddInstruction(HloInstruction::CreateConvert(bf16_shape, gte)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_FALSE(FoldConversions(module.get())); + + EXPECT_EQ(computation->root_instruction(), convert1); + EXPECT_EQ(gte->shape().element_type(), F32); + EXPECT_EQ(tuple->operand(1), convert0); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc new file mode 100644 index 0000000000..b032c040e8 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -0,0 +1,351 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/bfloat16_normalization.h" + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +class BFloat16NormalizationVisitor : public DfsHloVisitorWithDefault { + public: + explicit BFloat16NormalizationVisitor(HloComputation* computation, + const BFloat16Support* bfloat16_support) + : computation_(computation), bfloat16_support_(bfloat16_support) {} + + Status DefaultAction(HloInstruction* hlo) override; + + // Special handling for cross-replica-sum which can have a tuple output. + Status HandleCrossReplicaSum(HloInstruction* crs) override; + + static bool Run(HloComputation* computation, + const BFloat16Support* bfloat16_support) { + BFloat16NormalizationVisitor visitor(computation, bfloat16_support); + TF_CHECK_OK(computation->Accept(&visitor)); + return visitor.changed_; + } + + private: + // Checks if the HLO uses BF16 in an unsupported way, and if so, inserts + // conversions between F32 and BF16 to make it supported. + Status HandleInstruction(HloInstruction* hlo); + + // Inserts a conversion HLO that changes the given HLO's output type. + Status InsertConvertAfterOutput(HloInstruction* hlo, PrimitiveType to, + HloComputation* computation); + + // Changes the output type to the specified type, then inserts a conversion + // to the original type. + Status ChangeOutputTypeThenInsertConvertBack(HloInstruction* hlo, + PrimitiveType to, + HloComputation* computation); + + // Inserts a conversion HLO that changes the given HLO's operand type. + Status InsertConvertBeforeOperand(HloInstruction* hlo, int64 operand_idx, + PrimitiveType to, + HloComputation* computation); + + // Inserts conversion HLOs to replace the called computations' BF16 + // operands/outputs to F32. + Status ConvertCalledComputations( + HloInstruction* hlo, + tensorflow::gtl::ArraySlice bf16_called_comps); + + HloComputation* computation_; + const BFloat16Support* bfloat16_support_; + bool changed_ = false; +}; + +Status BFloat16NormalizationVisitor::InsertConvertAfterOutput( + HloInstruction* hlo, PrimitiveType to, HloComputation* computation) { + bool is_root = computation->root_instruction() == hlo; + std::vector materialized_users = hlo->users(); + // Use inst's shape temporarily, in order to pass checks in ReplaceUseWith. + auto convert = computation->AddInstruction( + HloInstruction::CreateConvert(hlo->shape(), hlo)); + for (auto* user : materialized_users) { + TF_RETURN_IF_ERROR(hlo->ReplaceUseWith(user, convert)); + } + if (is_root) { + computation->set_root_instruction(convert); + } + convert->mutable_shape()->set_element_type(to); + changed_ = true; + return Status::OK(); +} + +Status BFloat16NormalizationVisitor::ChangeOutputTypeThenInsertConvertBack( + HloInstruction* hlo, PrimitiveType to, HloComputation* computation) { + auto original_type = hlo->shape().element_type(); + hlo->mutable_shape()->set_element_type(to); + return InsertConvertAfterOutput(hlo, original_type, computation); +} + +Status BFloat16NormalizationVisitor::InsertConvertBeforeOperand( + HloInstruction* hlo, int64 operand_idx, PrimitiveType to, + HloComputation* computation) { + auto operand = hlo->mutable_operand(operand_idx); + auto convert = computation->AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(operand->shape(), to), operand)); + TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(operand_idx, convert)); + changed_ = true; + return Status::OK(); +} + +Status BFloat16NormalizationVisitor::ConvertCalledComputations( + HloInstruction* hlo, + tensorflow::gtl::ArraySlice bf16_called_comps) { + std::map cloned_computations; + for (auto& comp : bf16_called_comps) { + auto cloned = comp->parent()->AddEmbeddedComputation(comp->Clone()); + cloned_computations[comp] = cloned; + changed_ = true; + } + hlo->ReplaceCalledComputations([&](HloComputation* comp) { + auto it = cloned_computations.find(comp); + if (it != cloned_computations.end()) { + return it->second; + } + return comp; + }); + for (auto& comp_pair : cloned_computations) { + auto comp = comp_pair.second; + if (comp->root_instruction()->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR( + InsertConvertAfterOutput(comp->root_instruction(), F32, comp)); + } + for (auto* param : comp->parameter_instructions()) { + if (param->shape().element_type() == BF16) { + // This changes the parameter to F32 then inserts a convert after it. + TF_RETURN_IF_ERROR( + ChangeOutputTypeThenInsertConvertBack(param, F32, comp)); + } + } + } + return Status::OK(); +} + +Status BFloat16NormalizationVisitor::HandleCrossReplicaSum( + HloInstruction* crs) { + if (!ShapeUtil::IsTuple(crs->shape())) { + return HandleInstruction(crs); + } + + std::vector operand_types(crs->operand_count()); + std::vector output_types(crs->operand_count()); + bool has_f32 = false; + bool has_bf16 = false; + bool has_bf16_output = false; + for (int64 i = 0; i < crs->operand_count(); ++i) { + operand_types[i] = crs->operand(i)->shape().element_type(); + output_types[i] = ShapeUtil::GetSubshape(crs->shape(), {i}).element_type(); + if (operand_types[i] == F32 || output_types[i] == F32) { + has_f32 = true; + } else if (operand_types[i] == BF16) { + has_bf16 = true; + } + if (output_types[i] == BF16) { + has_bf16 = true; + has_bf16_output = true; + } + } + + for (int64 i = 0; i < crs->operand_count(); ++i) { + if (operand_types[i] != BF16) { + continue; + } + if (bfloat16_support_->SupportsBF16Operand(*crs, i) && + (bfloat16_support_->SupportsMixedPrecisions(*crs) || !has_f32)) { + continue; + } + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(crs, i, F32, computation_)); + has_f32 = true; + } + + if (!has_bf16_output) { + return Status::OK(); + } + + if (bfloat16_support_->SupportsBF16Output(*crs) && + (bfloat16_support_->SupportsMixedPrecisions(*crs) || !has_f32)) { + return Status::OK(); + } + + std::vector output_elements(crs->operand_count()); + auto original_shape = crs->shape(); + for (int64 i = 0; i < crs->operand_count(); ++i) { + auto subshape = ShapeUtil::GetMutableSubshape(crs->mutable_shape(), {i}); + if (output_types[i] != BF16) { + output_elements[i] = computation_->AddInstruction( + HloInstruction::CreateGetTupleElement(*subshape, crs, i)); + continue; + } + subshape->set_element_type(F32); + auto gte = computation_->AddInstruction( + HloInstruction::CreateGetTupleElement(*subshape, crs, i)); + output_elements[i] = + computation_->AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(*subshape, BF16), gte)); + } + auto tuple = computation_->AddInstruction( + HloInstruction::CreateTuple(output_elements)); + + std::vector materialized_users = crs->users(); + // Use the crs' shape temporarily, in order to pass checks in + // ReplaceUseWith. + *tuple->mutable_shape() = crs->shape(); + for (auto* user : materialized_users) { + TF_RETURN_IF_ERROR(crs->ReplaceUseWith(user, tuple)); + } + *tuple->mutable_shape() = original_shape; + return Status::OK(); +} + +Status BFloat16NormalizationVisitor::HandleInstruction(HloInstruction* hlo) { + std::vector bf16_operands; + std::vector f32_operands; + bool has_f32 = false; + bool has_bf16 = false; + + for (int64 i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == F32) { + f32_operands.push_back(i); + has_f32 = true; + } else if (hlo->operand(i)->shape().element_type() == BF16) { + bf16_operands.push_back(i); + has_bf16 = true; + } + } + + if (hlo->shape().element_type() == F32) { + has_f32 = true; + } else if (hlo->shape().element_type() == BF16) { + has_bf16 = true; + } + + std::vector bf16_called_comps; + for (auto* comp : hlo->called_computations()) { + bool comp_has_bf16 = false; + if (comp->root_instruction()->shape().element_type() == F32) { + has_f32 = true; + } else if (comp->root_instruction()->shape().element_type() == BF16) { + has_bf16 = true; + comp_has_bf16 = true; + } + for (auto* param : comp->parameter_instructions()) { + if (param->shape().element_type() == F32) { + has_f32 = true; + } else if (param->shape().element_type() == BF16) { + has_bf16 = true; + comp_has_bf16 = true; + } + } + if (comp_has_bf16) { + bf16_called_comps.push_back(comp); + } + } + + if (!bfloat16_support_->SupportsMixedPrecisions(*hlo) && has_bf16 && + has_f32) { + // Resolve unsupported mixed precision. + // + // See if we can change everything to BF16. + if (hlo->called_computations().empty() && + hlo->shape().element_type() == BF16) { + bool can_use_bf16 = true; + for (int i : f32_operands) { + if (bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(*hlo, + i) && + bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + continue; + } + can_use_bf16 = false; + break; + } + if (can_use_bf16) { + for (int i : f32_operands) { + TF_RETURN_IF_ERROR( + InsertConvertBeforeOperand(hlo, i, BF16, computation_)); + } + return Status::OK(); + } + } + if (hlo->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR( + ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + } + for (int i : bf16_operands) { + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + } + return ConvertCalledComputations(hlo, bf16_called_comps); + } + + for (int i : bf16_operands) { + if (!bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + } + } + + if (hlo->shape().element_type() == BF16 && + !bfloat16_support_->SupportsBF16Output(*hlo)) { + TF_RETURN_IF_ERROR( + ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + } + + return Status::OK(); +} + +Status BFloat16NormalizationVisitor::DefaultAction(HloInstruction* hlo) { + // Do not change instructions related to entry and exit of a computation, + // tuples, fusion, convert, and control flow. + if (hlo->opcode() == HloOpcode::kTuple || // + hlo->opcode() == HloOpcode::kGetTupleElement || // + hlo->opcode() == HloOpcode::kInfeed || // + hlo->opcode() == HloOpcode::kOutfeed || // + hlo->opcode() == HloOpcode::kConstant || // + hlo->opcode() == HloOpcode::kParameter || // + hlo->opcode() == HloOpcode::kFusion || // + hlo->opcode() == HloOpcode::kConvert || // + hlo->opcode() == HloOpcode::kCall || // + hlo->opcode() == HloOpcode::kCustomCall || // + hlo->opcode() == HloOpcode::kWhile || // + hlo->opcode() == HloOpcode::kConditional) { + return Status::OK(); + } + return HandleInstruction(hlo); +} + +StatusOr BFloat16Normalization::Run(HloModule* module) { + XLA_VLOG_LINES( + 2, "BFloat16Normalization::Run(), before:\n" + module->ToString()); + bool changed = false; + for (auto* comp : module->MakeComputationPostOrder()) { + if (BFloat16NormalizationVisitor::Run(comp, bfloat16_support_)) { + changed = true; + } + } + XLA_VLOG_LINES(2, + "BFloat16Normalization::Run(), after:\n" + module->ToString()); + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.h b/tensorflow/compiler/xla/service/bfloat16_normalization.h new file mode 100644 index 0000000000..2a60fe0af3 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.h @@ -0,0 +1,92 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_NORMALIZATION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_NORMALIZATION_H_ + +#include "tensorflow/compiler/xla/service/bfloat16_support.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" + +namespace xla { + +// A pass which adds F32 <-> BF16 conversions for HLO instructions that do not +// support BF16 input/output or mixed precision, according to the passed-in +// backend-specific BF16 support rules. +class BFloat16Normalization : public HloPassInterface { + public: + explicit BFloat16Normalization(const BFloat16Support* bfloat16_support) + : bfloat16_support_(bfloat16_support) {} + + ~BFloat16Normalization() override = default; + tensorflow::StringPiece name() const override { return "bf16-normalization"; } + + // Run BF16 normalization on the given computation. Returns whether the + // computation was changed. + StatusOr Run(HloModule* module) override; + + private: + const BFloat16Support* bfloat16_support_; +}; + +// A pass that unconditionally removes the mixed F32/BF16 uses in HLO +// instructions (excluding convert) by adding F32 <-> BF16 conversions. Unlike +// BFloat16Normalization, this pass does not use a backend-specific +// BFloat16Support, and does not change HLOs that have BF16 data if they do not +// use mixed precision; it removes mixed precision even if the backend supports +// it. This pass is used to make the HLO module valid for other HLO passes which +// do not support mixed precision. +class BFloat16MixedPrecisionRemoval : public HloPassInterface { + public: + BFloat16MixedPrecisionRemoval() {} + + ~BFloat16MixedPrecisionRemoval() override = default; + + tensorflow::StringPiece name() const override { + return "bf16-mixed-precision-removal"; + } + + // Run mixed precision removal on the given computation. Returns whether the + // computation was changed. + StatusOr Run(HloModule* module) override { + BFloat16Normalization normalization(&no_mixed_precision_support_); + return normalization.Run(module); + } + + private: + class BFloat16SupportForMixedPrecisionRemoval : public BFloat16Support { + public: + BFloat16SupportForMixedPrecisionRemoval() {} + + ~BFloat16SupportForMixedPrecisionRemoval() override = default; + + bool SupportsBF16Operand(const HloInstruction& hlo, + int64 operand_index) const override { + return true; + } + + bool SupportsBF16Output(const HloInstruction& hlo) const override { + return true; + } + + bool SupportsMixedPrecisions(const HloInstruction& hlo) const override { + return false; + } + } no_mixed_precision_support_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_NORMALIZATION_H_ diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc new file mode 100644 index 0000000000..66c3085842 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -0,0 +1,248 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/bfloat16_normalization.h" +#include "tensorflow/compiler/xla/service/bfloat16_support.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/test_helpers.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" + +namespace xla { + +class TestBFloat16Support : public BFloat16Support { + public: + TestBFloat16Support() {} + ~TestBFloat16Support() override {} + + bool SupportsBF16Operand(const HloInstruction& hlo, + int64 operand_index) const override { + if (hlo.opcode() == HloOpcode::kAdd || + hlo.opcode() == HloOpcode::kSubtract || + hlo.opcode() == HloOpcode::kReduce || + hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } + + bool SupportsBF16Output(const HloInstruction& hlo) const override { + if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kReduce || + hlo.opcode() == HloOpcode::kSubtract || + hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } + + bool SupportsMixedPrecisions(const HloInstruction& hlo) const override { + if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kGetTupleElement) { + return true; + } + return false; + } +}; + +class BFloat16NormalizationTest : public HloTestBase { + protected: + bool Normalize(HloModule* module) { + TestBFloat16Support bfloat16_support_; + BFloat16Normalization normalization(&bfloat16_support_); + StatusOr result = normalization.Run(module); + EXPECT_IS_OK(result.status()); + return result.ValueOrDie(); + } +}; + +TEST_F(BFloat16NormalizationTest, NoopIfSupported) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kAdd, a, b)); + + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(f32_shape, HloOpcode::kAdd, add0, c)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_FALSE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction(), add1); + EXPECT_EQ(add0->shape().element_type(), BF16); + EXPECT_EQ(add1->shape().element_type(), F32); +} + +TEST_F(BFloat16NormalizationTest, ResolveIfUnsupportedBF16) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* mul0 = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kMultiply, a, b)); + + HloInstruction* mul1 = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kMultiply, mul0, c)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kConvert); + EXPECT_EQ(computation->root_instruction()->operand(0), mul1); + EXPECT_EQ(mul0->shape().element_type(), F32); + EXPECT_EQ(mul1->shape().element_type(), F32); + EXPECT_EQ(mul1->operand(0)->opcode(), HloOpcode::kConvert); +} + +TEST_F(BFloat16NormalizationTest, ResolveUnsupportedMixedPrecisionSubtraction) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + HloInstruction* c = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32_shape, "c")); + + HloInstruction* sub0 = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kSubtract, a, b)); + + HloInstruction* sub1 = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kSubtract, sub0, c)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kConvert); + EXPECT_EQ(computation->root_instruction()->operand(0), sub1); + EXPECT_EQ(sub0->shape().element_type(), F32); + EXPECT_EQ(sub1->shape().element_type(), F32); + EXPECT_EQ(sub1->operand(0)->opcode(), HloOpcode::kConvert); +} + +TEST_F(BFloat16NormalizationTest, ResolveUnsupportedMixedPrecisionReduce) { + Shape f32_input_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape f32_output_shape = ShapeUtil::MakeShape(F32, {4}); + + Shape bf16_scalar_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + auto reduce_comp_builder = HloComputation::Builder("reduce_comp"); + auto reduce_comp_param0 = reduce_comp_builder.AddInstruction( + HloInstruction::CreateParameter(0, bf16_scalar_shape, "param0")); + auto reduce_comp_param1 = reduce_comp_builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_scalar_shape, "param1")); + reduce_comp_builder.AddInstruction( + HloInstruction::CreateBinary(bf16_scalar_shape, HloOpcode::kAdd, + reduce_comp_param0, reduce_comp_param1)); + + auto module = CreateNewModule(); + auto reduce_computation = + module->AddEmbeddedComputation(reduce_comp_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + HloInstruction* input = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_input_shape, "a")); + HloInstruction* init = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_scalar_shape, "init")); + HloInstruction* reduce = builder.AddInstruction(HloInstruction::CreateReduce( + f32_output_shape, input, init, {0}, reduce_computation)); + + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction(), reduce); + EXPECT_EQ(reduce->called_computations().size(), 1); + EXPECT_EQ(reduce->called_computations()[0]->num_parameters(), 2); + EXPECT_EQ(reduce->called_computations()[0] + ->parameter_instruction(0) + ->shape() + .element_type(), + F32); + EXPECT_EQ(reduce->called_computations()[0] + ->parameter_instruction(1) + ->shape() + .element_type(), + F32); + EXPECT_EQ(reduce->called_computations()[0] + ->root_instruction() + ->shape() + .element_type(), + F32); + EXPECT_EQ(reduce->shape().element_type(), F32); + EXPECT_EQ(reduce->operand(0), input); + EXPECT_EQ(input->shape().element_type(), F32); + EXPECT_EQ(reduce->operand(1)->opcode(), HloOpcode::kConvert); + EXPECT_EQ(reduce->operand(1)->shape().element_type(), F32); +} + +TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) { + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {2, 4}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + + HloInstruction* crs = + builder.AddInstruction(HloInstruction::CreateCrossReplicaSum( + ShapeUtil::MakeTupleShape({f32_shape, bf16_shape}), {a, b})); + HloInstruction* gte = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(bf16_shape, crs, 1)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction(), gte); + EXPECT_EQ(gte->shape().element_type(), BF16); + EXPECT_EQ(crs->operand(1)->shape().element_type(), F32); + EXPECT_EQ(ShapeUtil::GetSubshape(crs->shape(), {1}).element_type(), F32); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc new file mode 100644 index 0000000000..3fd9e24601 --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -0,0 +1,111 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/bfloat16_support.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" + +namespace xla { + +bool BFloat16Support::SupportsBF16Operand(const HloInstruction& hlo, + int64 operand_index) const { + switch (hlo.opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kCustomCall: + case HloOpcode::kGetTupleElement: + case HloOpcode::kTuple: + case HloOpcode::kWhile: + return true; + case HloOpcode::kConvert: + CHECK_EQ(operand_index, 0); + return hlo.operand(0)->shape().element_type() == BF16; + default: + break; + } + return false; +} + +bool BFloat16Support::SupportsBF16Output(const HloInstruction& hlo) const { + switch (hlo.opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kCustomCall: + case HloOpcode::kGetTupleElement: + case HloOpcode::kTuple: + case HloOpcode::kWhile: + return true; + case HloOpcode::kConvert: + return hlo.shape().element_type() == BF16; + default: + break; + } + return false; +} + +bool BFloat16Support::SupportsMixedPrecisions(const HloInstruction& hlo) const { + switch (hlo.opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kConvert: + case HloOpcode::kCustomCall: + case HloOpcode::kGetTupleElement: + case HloOpcode::kTuple: + case HloOpcode::kWhile: + return true; + default: + break; + } + return false; +} + +/* static */ +bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( + const HloInstruction& hlo, int64 operand_index) { + switch (hlo.opcode()) { + case HloOpcode::kAbs: + case HloOpcode::kBroadcast: + case HloOpcode::kClamp: + case HloOpcode::kConcatenate: + case HloOpcode::kCopy: + case HloOpcode::kGetTupleElement: + case HloOpcode::kMaximum: + case HloOpcode::kMinimum: + case HloOpcode::kPad: + case HloOpcode::kReshape: + case HloOpcode::kReverse: + case HloOpcode::kSlice: + case HloOpcode::kSort: + case HloOpcode::kTranspose: + case HloOpcode::kTuple: + return true; + case HloOpcode::kDynamicSlice: + return operand_index == 0; + case HloOpcode::kDynamicUpdateSlice: + return operand_index == 0 || operand_index == 1; + case HloOpcode::kSelect: + return operand_index == 1 || operand_index == 2; + default: + break; + } + return false; +} + +bool BFloat16Support::EffectiveOperandPrecisionIsBF16( + const HloInstruction& hlo, int64 operand_index) const { + return false; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/bfloat16_support.h b/tensorflow/compiler/xla/service/bfloat16_support.h new file mode 100644 index 0000000000..29f662d22b --- /dev/null +++ b/tensorflow/compiler/xla/service/bfloat16_support.h @@ -0,0 +1,60 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_SUPPORT_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_SUPPORT_H_ + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" + +namespace xla { + +class BFloat16Support { + public: + BFloat16Support() {} + virtual ~BFloat16Support() {} + + // Returns whether the backend supports BF16 operand for the HLO instruction + // at the given index. + virtual bool SupportsBF16Operand(const HloInstruction& hlo, + int64 operand_index) const; + + // Returns whether the backend supports BF16 output for the HLO instruction. + virtual bool SupportsBF16Output(const HloInstruction& hlo) const; + + // Returns whether the backend support mixed precision: the operands, output, + // and parameters/output of the called computations can have different + // precisions (BF16 and F32). + virtual bool SupportsMixedPrecisions(const HloInstruction& hlo) const; + + // Returns whether the given HLO inherits its BF16 operand precision at the + // given index, so even if the output is F32, elements in the output that + // depend on the BF16 operand will still have BF16 effective precision even if + // they have F32 format. Similarly, this also means if the output is BF16 then + // increasing the operand precision from BF16 to F32 will not change the + // output. This typically includes HLOs that pass elements from the operand to + // the output without arithmetic operations. + static bool EffectiveOperandPrecisionIsOutputPrecision( + const HloInstruction& hlo, int64 operand_index); + + // Returns if the backend only uses BF16 precision for the operand at the + // specified index, even if the operand is F32. + virtual bool EffectiveOperandPrecisionIsBF16(const HloInstruction& hlo, + int64 operand_index) const; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BFLOAT16_SUPPORT_H_ diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 0e4437b73b..0981f1f4fe 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1805,7 +1805,8 @@ void HloInstruction::RemoveUser(HloInstruction* user) { Status HloInstruction::ReplaceUseWith(HloInstruction* user, HloInstruction* new_producer) { - TF_RET_CHECK(ShapeUtil::Compatible(shape(), new_producer->shape())) + TF_RET_CHECK( + ShapeUtil::CompatibleIgnoringFpPrecision(shape(), new_producer->shape())) << "this shape: " << ShapeUtil::HumanString(shape()) << ", replacement shape: " << ShapeUtil::HumanString(new_producer->shape()); @@ -1828,8 +1829,8 @@ Status HloInstruction::ReplaceOperandWith(int64 operand_num, TF_RET_CHECK(operand_num >= 0); TF_RET_CHECK(operand_num < operand_count()); HloInstruction* old_operand = mutable_operand(operand_num); - TF_RET_CHECK( - ShapeUtil::Compatible(old_operand->shape(), new_operand->shape())) + TF_RET_CHECK(ShapeUtil::CompatibleIgnoringFpPrecision(old_operand->shape(), + new_operand->shape())) << old_operand->shape().ShortDebugString() << " is not compatible with " << new_operand->shape().ShortDebugString(); operands_[operand_num] = new_operand; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 04d4656546..e2b3bb9d71 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include + #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/lib/core/errors.h" @@ -164,6 +166,8 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { // HLO broadcast has no exact analog at the proto level so there is no // ShapeInference method. Check the output shape explicitly. const Shape& operand_shape = broadcast->operand(0)->shape(); + // Check for mixed precision. + TF_RETURN_IF_ERROR(CheckShape(broadcast, broadcast->shape())); TF_RET_CHECK(ShapeUtil::Rank(operand_shape) == broadcast->dimensions().size()); for (int64 operand_dimension = 0; @@ -178,6 +182,8 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { } Status ShapeVerifier::HandleReshape(HloInstruction* reshape) { + // Check for mixed precision. + TF_RETURN_IF_ERROR(CheckShape(reshape, reshape->shape())); TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) == ShapeUtil::ElementsIn(reshape->operand(0)->shape())); return tensorflow::Status::OK(); @@ -359,13 +365,122 @@ Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) { batch_norm_grad->feature_index())); } +namespace { + +// Checks that the instruction does not have mixed precision floating point +// inputs. +Status CheckMixedPrecisionOperands(const HloInstruction* instruction) { + switch (instruction->opcode()) { + // White list the following opcodes for mixed-precision check, because they + // involve data pass through or grouping via tuples, where the precisions + // of buffers can be different. + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kConstant: + case HloOpcode::kCrossReplicaSum: + case HloOpcode::kCustomCall: + case HloOpcode::kFusion: + case HloOpcode::kGetTupleElement: + case HloOpcode::kInfeed: + case HloOpcode::kOutfeed: + case HloOpcode::kParameter: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kReducePrecision: + case HloOpcode::kSelect: + case HloOpcode::kSend: + case HloOpcode::kSendDone: + case HloOpcode::kTuple: + case HloOpcode::kWhile: + break; + default: { + PrimitiveType fp_type = PRIMITIVE_TYPE_INVALID; + for (auto operand : instruction->operands()) { + TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( + operand->shape(), + [&](const Shape& subshape, const ShapeIndex& index) { + if (!ShapeUtil::ElementIsFloating(subshape)) { + return Status::OK(); + } + if (fp_type == PRIMITIVE_TYPE_INVALID) { + fp_type = subshape.element_type(); + } else if (fp_type != subshape.element_type()) { + return FailedPrecondition( + "Seen floating point types of different precisions in " + "%s, but mixed precision is disallowed.", + instruction->ToString().c_str()); + } + return Status::OK(); + })); + } + } + } + return Status::OK(); +} + +} // namespace + Status ShapeVerifier::CheckShape(const HloInstruction* instruction, - const Shape& expected_shape) { - if (!ShapeUtil::Compatible(instruction->shape(), expected_shape)) { + const Shape& inferred_shape) { + // If allow_mixed_precision_ is false, check if there are operands with + // different precisions. We need this check because ShapeInference allows + // mixed precision inputs. + if (!allow_mixed_precision_) { + TF_RETURN_IF_ERROR(CheckMixedPrecisionOperands(instruction)); + } + + // Check if the output shape matches the expected shape. + bool compatible; + // We treat BF16 and F32 as compatible types if mixed precision is allowed, + // but only when the instruction defines the BF16/F32 buffer. + switch (instruction->opcode()) { + case HloOpcode::kSelect: + if (ShapeUtil::IsTuple(inferred_shape) || !allow_mixed_precision_) { + // Select only defines the top-level buffer, which in this case is the + // tuple, so we cannot allow mixed precision. + compatible = + ShapeUtil::Compatible(instruction->shape(), inferred_shape); + } else { + compatible = ShapeUtil::CompatibleIgnoringFpPrecision( + instruction->shape(), inferred_shape); + } + break; + case HloOpcode::kGetTupleElement: + case HloOpcode::kTuple: + // Tuple and GetTupleElement do not define BF16/F32 buffers, so mixed + // precision is disallowed. + case HloOpcode::kConstant: + case HloOpcode::kBitcast: + case HloOpcode::kBitcastConvert: + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kConvert: + case HloOpcode::kCustomCall: + case HloOpcode::kInfeed: + case HloOpcode::kOutfeed: + case HloOpcode::kParameter: + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + case HloOpcode::kSend: + case HloOpcode::kSendDone: + case HloOpcode::kWhile: + // The above opcodes should match the expected shapes exactly. + compatible = ShapeUtil::Compatible(instruction->shape(), inferred_shape); + break; + default: + if (allow_mixed_precision_) { + compatible = ShapeUtil::CompatibleIgnoringFpPrecision( + instruction->shape(), inferred_shape); + } else { + compatible = + ShapeUtil::Compatible(instruction->shape(), inferred_shape); + } + } + if (!compatible) { return InvalidArgument( "Expected instruction to have shape compatible with %s, actual " "shape is %s:\n%s", - ShapeUtil::HumanString(expected_shape).c_str(), + ShapeUtil::HumanString(inferred_shape).c_str(), ShapeUtil::HumanString(instruction->shape()).c_str(), instruction->ToString().c_str()); } @@ -373,14 +488,14 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction, } Status ShapeVerifier::CheckShape(const HloInstruction* instruction, - const StatusOr& expected_shape_status) { - if (!expected_shape_status.ok()) { - Status s = expected_shape_status.status(); + const StatusOr& inferred_shape_status) { + if (!inferred_shape_status.ok()) { + Status s = inferred_shape_status.status(); tensorflow::errors::AppendToMessage(&s, ", for instruction ", instruction->ToString()); return s; } - return CheckShape(instruction, expected_shape_status.ValueOrDie()); + return CheckShape(instruction, inferred_shape_status.ValueOrDie()); } Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index 26d53dec1e..7eccf834bb 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -27,6 +27,10 @@ namespace xla { // TODO(b/26024837): Check output shape for all instruction types. class ShapeVerifier : public DfsHloVisitor { public: + explicit ShapeVerifier() : allow_mixed_precision_(false) {} + explicit ShapeVerifier(bool allow_mixed_precision) + : allow_mixed_precision_(allow_mixed_precision) {} + Status HandleElementwiseUnary(HloInstruction* hlo) override; Status HandleElementwiseBinary(HloInstruction* hlo) override; Status HandleClamp(HloInstruction* clamp) override; @@ -81,14 +85,14 @@ class ShapeVerifier : public DfsHloVisitor { } protected: - // Check the instruction's shape against the given expected shape and return - // an appropriate error if there is a mismatch. + // Check the instruction's shape against the shape given by ShapeInference + // and return an appropriate error if there is a mismatch. Status CheckShape(const HloInstruction* instruction, - const Shape& expected_shape); + const Shape& inferred_shape); // Overload which takes a StatusOr to reduce boilerplate in the caller. Status CheckShape(const HloInstruction* instruction, - const StatusOr& expected_shape_status); + const StatusOr& inferred_shape_status); // Check a unary (binary, etc) instruction's shape against the inferred shape. Status CheckUnaryShape(const HloInstruction* instruction); @@ -99,19 +103,32 @@ class ShapeVerifier : public DfsHloVisitor { // Checks if the given two instructions shares the same channel id. Status CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2); + + private: + // Whether the inputs and output of an instruction can contain both F32s and + // BF16s. Tuples that include both F32s and BF16s are allowed regardless of + // this flag. + bool allow_mixed_precision_; }; // HLO pass that verifies invariants of HLO instructions for each computation in // the module. class HloVerifier : public HloPassInterface { public: + using ShapeVerifierFactory = std::function()>; + // Uses standard shape inference. explicit HloVerifier() - : shape_verifier_factory_([] { return MakeUnique(); }) {} + : shape_verifier_factory_( + [] { return MakeUnique(false); }) {} + + explicit HloVerifier(bool allow_mixed_precision) + : shape_verifier_factory_([allow_mixed_precision] { + return MakeUnique(allow_mixed_precision); + }) {} // Uses custom shape verification. - explicit HloVerifier( - std::function()> shape_verifier_factory) + explicit HloVerifier(ShapeVerifierFactory shape_verifier_factory) : shape_verifier_factory_(std::move(shape_verifier_factory)) {} ~HloVerifier() override = default; @@ -129,7 +146,7 @@ class HloVerifier : public HloPassInterface { // expectations. This is a factory function because ShapeVerifier, Note that // ShapeVerifier, being a DfsHloVisitor, is stateful. We want a clean object // for each run of the verifier. - std::function()> shape_verifier_factory_; + ShapeVerifierFactory shape_verifier_factory_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 4ba6da6ccc..004889b5f2 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -209,7 +209,8 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, } // Check that init_value's shape is suitable for reducer_shape. - if (!ShapeUtil::Compatible(accumulator_shape, init_value_shape)) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(accumulator_shape, + init_value_shape)) { return InvalidArgument( "Reduction function's accumulator shape differs from the " "init_value shape: %s vs %s", @@ -220,8 +221,8 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, // Check that the inputs can be passed in as the second argument. const Shape& input_element_shape = ShapeUtil::MakeShape(input_element_type, {}); - if (!ShapeUtil::Compatible(input_element_shape, - reducer_shape.parameters(1))) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(input_element_shape, + reducer_shape.parameters(1))) { return InvalidArgument( "Reduction function's second parameter shape differs from the " "input type element type: %s vs %s", @@ -231,7 +232,8 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, // Currently the accumulator and inputs must be the same type, // though that restriction could be relaxed. - if (!ShapeUtil::Compatible(accumulator_shape, reducer_shape.parameters(1))) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(accumulator_shape, + reducer_shape.parameters(1))) { return InvalidArgument( "Reduction function's second parameter shape currently must " "match the result shape. Got %s vs %s", @@ -394,11 +396,13 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, dimension); } const Shape* arg_shape = nullptr; + PrimitiveType element_type = PRIMITIVE_TYPE_INVALID; for (const Shape* shape : arg_shapes) { TF_RETURN_IF_ERROR( ExpectNotTupleOrOpaque(*shape, "operand of concatenation")); if (!arg_shape) { arg_shape = shape; + element_type = arg_shape->element_type(); continue; } if (ShapeUtil::Rank(*arg_shape) != ShapeUtil::Rank(*shape)) { @@ -409,7 +413,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, ShapeUtil::HumanString(*arg_shape).c_str(), ShapeUtil::Rank(*shape), ShapeUtil::HumanString(*shape).c_str()); } - if (arg_shape->element_type() != shape->element_type()) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(*arg_shape, *shape)) { return InvalidArgument( "cannot concatenate arrays with different element types: %s vs %s", PrimitiveType_Name(arg_shape->element_type()).c_str(), @@ -431,6 +435,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, ShapeUtil::HumanString(*shape).c_str(), dimension); } } + element_type = ShapeUtil::HigherPrecisionElementType(*shape, *arg_shape); } std::vector new_dimensions(arg_shape->dimensions().begin(), @@ -438,7 +443,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, for (size_t i = 1; i < arg_shapes.size(); ++i) { new_dimensions[dimension] += arg_shapes[i]->dimensions(dimension); } - return ShapeUtil::MakeShape(arg_shape->element_type(), new_dimensions); + return ShapeUtil::MakeShape(element_type, new_dimensions); } /* static */ StatusOr ShapeInference::InferConvertShape( @@ -536,7 +541,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, ShapeUtil::HumanString(operand_shape).c_str(), padding_config.ShortDebugString().c_str()); } - if (operand_shape.element_type() != padding_value_shape.element_type()) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, + padding_value_shape)) { return InvalidArgument( "the element types of the operands to pad do not match"); } @@ -548,7 +554,9 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, std::max(operand_shape.dimensions(i) - 1, 0LL) * padding_config.dimensions(i).interior_padding(); } - return ShapeUtil::MakeShape(operand_shape.element_type(), dimensions); + return ShapeUtil::MakeShape( + ShapeUtil::HigherPrecisionElementType(operand_shape, padding_value_shape), + dimensions); } // Current DotDimensionNumbers Requirements: @@ -673,7 +681,7 @@ Status ValidateDotDimensionNumbers( }; // Check if both element types are the same. - if (lhs.element_type() != rhs.element_type()) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return fail("element types do not match"); } @@ -736,7 +744,8 @@ Status ValidateDotDimensionNumbers( dimensions.push_back(rhs.dimensions(i)); } } - Shape result = ShapeUtil::MakeShape(lhs.element_type(), dimensions); + Shape result = ShapeUtil::MakeShape( + ShapeUtil::HigherPrecisionElementType(lhs, rhs), dimensions); TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(result)); VLOG(2) << "inferred dot shape: " << ShapeUtil::HumanString(result); @@ -767,7 +776,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( ShapeUtil::HumanString(rhs).c_str()); } } - return ShapeUtil::MakeShape(lhs.element_type(), output_dimensions); + return ShapeUtil::MakeShape(ShapeUtil::HigherPrecisionElementType(lhs, rhs), + output_dimensions); } /* static */ StatusOr ShapeInference::InferInDimBroadcastShape( @@ -829,6 +839,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // specified in broadcast_dimensions are then changed to match the // corresponding dimension size in smaller_shape. Shape output_shape(larger_shape); + output_shape.set_element_type( + ShapeUtil::HigherPrecisionElementType(larger_shape, smaller_shape)); for (int i = 0; i < smaller_shape.dimensions_size(); ++i) { int64 dimension_to_match = broadcast_dimensions.at(i); @@ -878,7 +890,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR( ExpectNotTupleOrOpaque(rhs, "rhs of elementwise binary operation")); - if (!ShapeUtil::SameElementType(lhs, rhs)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( "binary op %s with different element types: %s and %s", BinaryOperation_Name(operation).c_str(), @@ -897,10 +909,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } } - if (ShapeUtil::Compatible(lhs, rhs)) { + if (ShapeUtil::CompatibleIgnoringFpPrecision(lhs, rhs)) { // If the shapes are the same other than layout, the output shape is the // same (elementwise op). - return lhs; + return ShapeUtil::ChangeElementType( + lhs, ShapeUtil::HigherPrecisionElementType(lhs, rhs)); } if (ShapeUtil::Rank(lhs) == ShapeUtil::Rank(rhs)) { @@ -973,7 +986,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_ASSIGN_OR_RETURN(const Shape& shape, InferElementwiseBinaryOpShape(operation, lhs, rhs, broadcast_dimensions)); - if (lhs.element_type() == F32) { + if (lhs.element_type() == F32 && rhs.element_type() == F32) { return ShapeUtil::ChangeElementType(shape, C64); } else { return Unimplemented("complex component type not supported"); @@ -1078,12 +1091,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR( ExpectNotTupleOrOpaque(*arg_shapes[i], "operand of map")); - if (ShapeUtil::Compatible(*arg_shapes[i], *arg_shape)) { + if (ShapeUtil::CompatibleIgnoringFpPrecision(*arg_shapes[i], *arg_shape)) { continue; } if (!ShapeUtil::IsTuple(*arg_shapes[i]) && !ShapeUtil::IsTuple(*arg_shape) && - ShapeUtil::SameElementType(*arg_shapes[i], *arg_shape)) { + ShapeUtil::SameElementTypeIgnoringFpPrecision(*arg_shapes[i], + *arg_shape)) { if (ShapeUtil::IsScalar(*arg_shapes[i])) { continue; } @@ -1148,7 +1162,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( i, ShapeUtil::HumanString(parameter_shape).c_str()); } - if (parameter_shape.element_type() != arg_shape->element_type()) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(parameter_shape, + *arg_shape)) { return InvalidArgument( "mapped computation's parameter type has to match argument element " "type; got parameter %d shape: %s, argument shape: %s", @@ -1221,7 +1236,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(offset_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(offset_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of offset factor is %s " @@ -1230,7 +1246,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(scale_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(scale_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of scale factor is %s " @@ -1329,7 +1346,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(offset_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(offset_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for " "batch-norm-inference, " @@ -1339,7 +1357,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(scale_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(scale_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for " "batch-norm-inference, " @@ -1349,7 +1368,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(mean_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(mean_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for " "batch-norm-inference, " @@ -1359,7 +1379,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(variance_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(variance_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for " "batch-norm-inference, " @@ -1481,7 +1502,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(output_grad_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(output_grad_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(output_grad_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of output_grad is %s " @@ -1490,7 +1512,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(scale_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(scale_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of scale factor is %s " @@ -1499,7 +1522,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(mean_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(mean_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " @@ -1508,7 +1532,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( PrimitiveType_Name(operand_shape.element_type()).c_str()); } - if (!ShapeUtil::SameElementType(var_shape, operand_shape)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(var_shape, + operand_shape)) { return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " @@ -1569,7 +1594,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(lhs, "lhs of convolution")); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(rhs, "rhs of convolution")); - if (!ShapeUtil::SameElementType(lhs, rhs)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( "Convolution with different element types: %s and %s", ShapeUtil::HumanString(lhs).c_str(), @@ -1714,8 +1739,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( dimensions[dnums.output_spatial_dimensions(i)] = window_output_shape.dimensions(i); } - - return ShapeUtil::MakeShape(lhs.element_type(), dimensions); + return ShapeUtil::MakeShape(ShapeUtil::HigherPrecisionElementType(lhs, rhs), + dimensions); } /* static */ StatusOr ShapeInference::InferFftShape( @@ -1877,16 +1902,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } const Shape& operand_element_shape = ShapeUtil::MakeShape(operand_shape.element_type(), {}); - if (!ShapeUtil::Compatible(operand_element_shape, - select_shape.parameters(0))) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, + select_shape.parameters(0))) { return InvalidArgument( "select function's first parameter shape currently must " "match the operand element shape. Got %s vs %s", ShapeUtil::HumanString(select_shape.parameters(0)).c_str(), ShapeUtil::HumanString(operand_element_shape).c_str()); } - if (!ShapeUtil::Compatible(operand_element_shape, - select_shape.parameters(1))) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, + select_shape.parameters(1))) { return InvalidArgument( "select function's second parameter shape currently must " "match the operand element shape. Got %s vs %s", @@ -1903,7 +1928,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( InferWindowOutputShape(operand_shape, window, operand_shape.element_type(), /*allow_negative_padding=*/false)); - if (!ShapeUtil::Compatible(source_shape, window_result_shape)) { + if (!ShapeUtil::CompatibleIgnoringFpPrecision(source_shape, + window_result_shape)) { return InvalidArgument( "source shape does not match the shape of window-reduced operand: " "source(%s), window-reduced operand(%s)", @@ -2086,7 +2112,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( ShapeUtil::Rank(update_shape), ShapeUtil::Rank(operand_shape)); } - if (operand_shape.element_type() != update_shape.element_type()) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, + update_shape)) { return InvalidArgument( "dynamic update slice update element type does not match argument. " "operand.element_type: %s vs update.element_type: %s", @@ -2322,24 +2349,26 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(min, "clamp min")); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(operand, "clamp operand")); TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(max, "clamp max")); - if (!ShapeUtil::SameElementType(min, operand) || - !ShapeUtil::SameElementType(max, operand)) { + if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(min, operand) || + !ShapeUtil::SameElementTypeIgnoringFpPrecision(max, operand)) { return InvalidArgument("clamp op with different operand types: %s, %s, %s", ShapeUtil::HumanString(min).c_str(), ShapeUtil::HumanString(operand).c_str(), ShapeUtil::HumanString(max).c_str()); } - if (((ShapeUtil::Compatible(min, operand) || ShapeUtil::IsScalar(min)) && - (ShapeUtil::Compatible(max, operand) || ShapeUtil::IsScalar(max)))) { + if (((ShapeUtil::CompatibleIgnoringFpPrecision(min, operand) || + ShapeUtil::IsScalar(min)) && + (ShapeUtil::CompatibleIgnoringFpPrecision(max, operand) || + ShapeUtil::IsScalar(max)))) { return operand; } if (ShapeUtil::IsScalar(operand)) { - if (ShapeUtil::Compatible(min, max)) { - return min; + if (ShapeUtil::CompatibleIgnoringFpPrecision(min, max)) { + return ShapeUtil::ChangeElementType(min, operand.element_type()); } else if (ShapeUtil::IsScalar(min)) { - return max; + return ShapeUtil::ChangeElementType(max, operand.element_type()); } else if (ShapeUtil::IsScalar(max)) { - return min; + return ShapeUtil::ChangeElementType(min, operand.element_type()); } } return Unimplemented( @@ -2352,7 +2381,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // broadcast from all operands, not just the predicate. /* static */ StatusOr ShapeInference::InferSelectShape( const Shape& pred, const Shape& on_true, const Shape& on_false) { - if (!ShapeUtil::Compatible(on_true, on_false)) { + bool compatible; + if (ShapeUtil::IsTuple(on_true)) { + // Select only defines the top-level buffer, so if it's a tuple, the two + // input must match exactly. + compatible = ShapeUtil::Compatible(on_true, on_false); + } else { + compatible = ShapeUtil::CompatibleIgnoringFpPrecision(on_true, on_false); + } + if (!compatible) { return InvalidArgument( "operands to select must be the same shape; got %s and %s", ShapeUtil::HumanString(on_true).c_str(), @@ -2367,7 +2404,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // By this stage we know that pred's element type is PRED. Therefore, this // check restricts pred to be a PRED scalar, or a PRED array with the same // dimensions as on_true and on_false. - return on_true; + return ShapeUtil::ChangeElementType( + on_true, ShapeUtil::HigherPrecisionElementType(on_true, on_false)); } else { return Unimplemented( "select operation with non-scalar predicate with dimensionality " diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index d63e16ce2b..604e0173e7 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -630,6 +630,19 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return SameDimensions(lhs, rhs); } +/* static */ bool ShapeUtil::CompatibleIgnoringFpPrecision(const Shape& lhs, + const Shape& rhs) { + if (lhs.element_type() == TUPLE) { + return rhs.element_type() == TUPLE && + ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), + CompatibleIgnoringFpPrecision); + } + if (SameElementTypeIgnoringFpPrecision(lhs, rhs)) { + return CompatibleIgnoringElementType(lhs, rhs); + } + return false; +} + /* static */ int64 ShapeUtil::GetDimension(const Shape& shape, int64 dimension_number) { return shape.dimensions(GetDimensionNumber(shape, dimension_number)); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 453d4ec047..d8a00880e9 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -23,6 +23,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/layout_util.h" +#include "tensorflow/compiler/xla/primitive_util.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -211,6 +212,31 @@ class ShapeUtil { return lhs.element_type() == rhs.element_type(); } + // As SameElementType, but allows floating point types to have different + // precisions. + static bool SameElementTypeIgnoringFpPrecision(const Shape& a, + const Shape& b) { + if (ElementIsFloating(a) && ElementIsFloating(b)) { + return true; + } + return ShapeUtil::SameElementType(a, b); + } + + // Returns the higher-precision element type if a and b are both floating + // point types; otherwise, checks that that they have the same element type + // and returns it. + static PrimitiveType HigherPrecisionElementType(const Shape& a, + const Shape& b) { + if (SameElementType(a, b)) { + return a.element_type(); + } + CHECK(SameElementTypeIgnoringFpPrecision(a, b)); + return primitive_util::BitWidth(a.element_type()) < + primitive_util::BitWidth(b.element_type()) + ? b.element_type() + : a.element_type(); + } + // Returns true if the rank, dimension sizes, and element type are // identical. Layout is ignored. Tuple elements are compared recursively for // compatibility. @@ -221,6 +247,10 @@ class ShapeUtil { // compatibility. static bool CompatibleIgnoringElementType(const Shape& lhs, const Shape& rhs); + // As Compatible, but allow one of lhs and rhs to be BF16 while the other + // being F32. Tuple elements are compared recursively for compatibility. + static bool CompatibleIgnoringFpPrecision(const Shape& lhs, const Shape& rhs); + // Returns whether the lhs and rhs shapes are identical protobufs. static bool Equal(const Shape& lhs, const Shape& rhs); diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 81ba7afb95..4db97d45b2 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -170,6 +170,18 @@ TEST(ShapeUtilTest, CompatibleNotIdenticalShapes) { EXPECT_TRUE(ShapeUtil::Compatible(shape_1, shape_2)); } +TEST(ShapeUtilTest, CompatibleIgnoringFpPrecision) { + Shape shape1 = ShapeUtil::MakeShape(BF16, {3, 2}); + Shape shape2 = ShapeUtil::MakeShape(F32, {3, 2}); + ASSERT_TRUE(ShapeUtil::CompatibleIgnoringFpPrecision(shape1, shape2)); +} + +TEST(ShapeUtilTest, IncompatibleIgnoringFpPrecision) { + Shape shape1 = ShapeUtil::MakeShape(BF16, {3, 2}); + Shape shape2 = ShapeUtil::MakeShape(F32, {2, 2}); + ASSERT_FALSE(ShapeUtil::CompatibleIgnoringFpPrecision(shape1, shape2)); +} + TEST(ShapeUtilTest, IncompatibleDifferentElementShapes) { Shape shape_1 = ShapeUtil::MakeShape(F32, {3, 2}); Shape shape_2 = ShapeUtil::MakeShape(PRED, {3, 2}); @@ -184,6 +196,14 @@ TEST(ShapeUtilTest, CompatibleTuples) { EXPECT_TRUE(ShapeUtil::Compatible(tuple1, tuple2)); } +TEST(ShapeUtilTest, CompatibleTuplesIgnoringFpPrecision) { + Shape tuple1 = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(BF16, {3, 2}), ShapeUtil::MakeShape(F32, {4, 5})}); + Shape tuple2 = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F64, {3, 2}), ShapeUtil::MakeShape(BF16, {4, 5})}); + EXPECT_TRUE(ShapeUtil::CompatibleIgnoringFpPrecision(tuple1, tuple2)); +} + TEST(ShapeUtilTest, IncompatibleTuplesWithSwappedElements) { Shape tuple1 = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(PRED, {4, 5}), ShapeUtil::MakeShape(F32, {3, 2})}); @@ -193,6 +213,14 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithSwappedElements) { EXPECT_FALSE(ShapeUtil::CompatibleIgnoringElementType(tuple1, tuple2)); } +TEST(ShapeUtilTest, IncompatibleTuplesIgnoringFpPrecision) { + Shape tuple1 = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(BF16, {4, 5}), ShapeUtil::MakeShape(F32, {3, 2})}); + Shape tuple2 = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {3, 2}), ShapeUtil::MakeShape(BF16, {4, 5})}); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringFpPrecision(tuple1, tuple2)); +} + TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentPrimitiveType) { Shape tuple1 = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(PRED, {4, 5}), ShapeUtil::MakeShape(F32, {3, 2})}); -- GitLab From c7e1de032658f7256e53b8a5a6066b140ecb9615 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 11:49:54 -0800 Subject: [PATCH 069/629] Adding support for tf.reduce_sum with keep_dims=True. PiperOrigin-RevId: 185411141 --- .../toco/graph_transformations/resolve_constant_unary.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index 1cd2aff28c..f227554bc5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -139,14 +139,13 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { output_buffer_size * sizeof(output_float_data[0])); } else if (unary_op->type == OperatorType::kTensorFlowSum) { // At the moment only full reduction across all dimensions is supported. - for (int i = 0; i < output_dims_count; i++) { - CHECK_EQ(output_shape.dims(i), 1); - } float sum = 0.f; for (int i = 0; i < input_buffer_size; i++) { sum += (*input_float_data)[i]; } - output_float_data[0] = sum; + for (int i = 0; i < output_buffer_size; ++i) { + output_float_data[i] = sum; + } } else if (unary_op->type == OperatorType::kTensorFlowMin) { // At the moment only full reduction across all dimensions is supported. // TODO(starka): Output should not be padded. -- GitLab From 9b703c34bb0124af75cb03c7a81251595f5e849b Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Mon, 12 Feb 2018 11:54:07 -0800 Subject: [PATCH 070/629] Support reduction with true keep_dims and squeeze along NHW dimensions. PiperOrigin-RevId: 185411786 --- .../grappler/optimizers/layout_optimizer.cc | 59 +++++++----- .../python/grappler/layout_optimizer_test.py | 90 +++++++++++++++++++ 2 files changed, 129 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 5a62b77327..a606f972ac 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1717,13 +1717,28 @@ class SqueezeProcessor : public AgnosticNodeProcessor { protected: bool ShouldProcess() const override { - return !MustPreserve() && IsPortZeroDimsN(*node_, 2) && HasOutputs() && - IsNodeAfterNCHWToNHWC() && IsInputConvertible() && IsAlongDimHW() && - IsOnGPU(); + bool is_dims_supported = (IsPortZeroDimsN(*node_, 2) && IsAlongHW()) || + (IsPortZeroDimsN(*node_, 1) && IsAlongNHW()); + return !MustPreserve() && HasOutputs() && IsNodeAfterNCHWToNHWC() && + IsInputConvertible() && is_dims_supported && IsOnGPU(); } Status AddLayoutTransposeToOutputs() override { return Status::OK(); } + Status CustomizedProcessing() override { + TF_RETURN_IF_ERROR(HasAttribute(*node_, "squeeze_dims")); + auto list = node_->mutable_attr()->at("squeeze_dims").mutable_list(); + if (list->i_size() == 2) { + list->set_i(0, 2); + list->set_i(1, 3); + } else if (list->i_size() == 3) { + list->set_i(1, 2); + list->set_i(2, 3); + } + return Status::OK(); + } + + private: bool IsInputConvertible() const { int input_port; auto input = node_map_->GetNode(node_->input(0)); @@ -1736,33 +1751,31 @@ class SqueezeProcessor : public AgnosticNodeProcessor { if (shape.dim(1).size() == 1 && shape.dim(2).size() == 1) { return true; } + if (shape.dim(0).size() == 1 && shape.dim(1).size() == 1 && + shape.dim(2).size() == 1) { + return true; + } } return false; } - bool IsAlongDimHW() const { + bool IsAlongAxis(const std::vector& axis) const { if (node_->attr().find("squeeze_dims") != node_->attr().end()) { auto list = node_->attr().at("squeeze_dims").list(); // If list is empty, Squeeze op will squeeze all dimensions of size 1. if (list.i_size() == 0) return true; - if (list.i_size() == 2) { - if (list.i(0) == 1 && list.i(1) == 2) { - return true; + if (list.i_size() == axis.size()) { + bool along_axis = true; + for (int i = 0; i < axis.size(); i++) { + along_axis = along_axis && (list.i(i) == axis[i]); } + if (along_axis) return true; } } return false; } - - Status CustomizedProcessing() override { - TF_RETURN_IF_ERROR(HasAttribute(*node_, "squeeze_dims")); - auto list = node_->mutable_attr()->at("squeeze_dims").mutable_list(); - if (list->i_size() == 2) { - list->set_i(0, 2); - list->set_i(1, 3); - } - return Status::OK(); - } + bool IsAlongHW() const { return IsAlongAxis({1, 2}); } + bool IsAlongNHW() const { return IsAlongAxis({0, 1, 2}); } }; class ReduceProcessor : public AgnosticNodeProcessor { @@ -1789,12 +1802,18 @@ class ReduceProcessor : public AgnosticNodeProcessor { return Status::OK(); } - Status AddLayoutTransposeToOutputs() override { return Status::OK(); } + Status AddLayoutTransposeToOutputs() override { + if (KeepDims()) { + return AddTransformToOutputs("Transpose"); + } + return Status::OK(); + } private: bool IsReduceAxisSupported() const { - return IsAlongAllFourDims() || IsAlongHWC() || - ((IsAlongNHW() || IsAlongHW() || IsAlongC()) && !KeepDims()); + return KeepDims() || ((IsAlongAllFourDims() || IsAlongHWC() || + IsAlongNHW() || IsAlongHW() || IsAlongC()) && + !KeepDims()); } bool IsAlongAxis(const std::vector& axis) const { diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 30dcdf31aa..b04bbb0daa 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -471,6 +471,66 @@ class LayoutOptimizerTest(test.TestCase): self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testSqueezeAlongHW(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2], keep_dims=True) + squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) + output = array_ops.identity(squeeze) + + with session.Session() as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run(output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + # Three transposes were initially added in the Expand phase of + # LayoutOptimizer; two of them are cancelled out in the Collapse phase. + expected_num_transposes = 1 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + + def testSqueezeAlongNHW(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2], keep_dims=True) + squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) + output = array_ops.identity(squeeze) + + with session.Session() as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run(output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + # Three transposes were initially added in the Expand phase of + # LayoutOptimizer; two of them are cancelled out in the Collapse phase. + expected_num_transposes = 1 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testReduceSumAlongHWC(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) @@ -558,6 +618,36 @@ class LayoutOptimizerTest(test.TestCase): self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testReduceSumAlongCKeepDims(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True) + output = array_ops.identity(reduce_sum) + + with session.Session() as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run(output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + # Four transposes were initially added in the Expand phase of + # LayoutOptimizer; two of them are cancelled out in the Collapse phase. + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testConcatWithControlDependency(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) -- GitLab From 00f355df5cb34817139dfb715e1277a6be17998e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Feb 2018 12:05:49 -0800 Subject: [PATCH 071/629] Enable the use of scheduling heuristics to reduce peak memory usage by default PiperOrigin-RevId: 185413855 --- .../core/grappler/costs/virtual_scheduler.cc | 12 +++++++----- .../core/grappler/optimizers/memory_optimizer.cc | 16 +++++++++++++--- .../core/grappler/optimizers/meta_optimizer.cc | 9 +++++---- .../core/grappler/optimizers/model_pruner.cc | 2 +- tensorflow/core/protobuf/rewriter_config.proto | 2 +- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 020492a3e9..14b4ed7507 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/grappler/costs/utils.h" #include "tensorflow/core/grappler/op_types.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/logging.h" @@ -446,13 +447,14 @@ Status VirtualScheduler::Init() { } if (ready_nodes_->Empty()) { - return Status(error::UNAVAILABLE, "No ready nodes in the graph."); + return errors::InvalidArgument("No ready nodes in the graph."); } - if (!feed_nodes.empty()) - LOG(ERROR) << "Some feed nodes were not found in the graph: " - << str_util::Join(feed_nodes, ","); - + if (!feed_nodes.empty()) { + return errors::InvalidArgument( + strings::StrCat("Some feed nodes were not found in the graph: ", + str_util::Join(feed_nodes, ","))); + } initialized_ = true; return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 9f3e94052f..ef178adc14 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -511,6 +511,10 @@ bool SchedulingPass(Cluster* cluster, GrapplerItem* item) { } } + if (addn_list.empty()) { + return false; + } + GraphMemory memory(*item); const std::unordered_map& devices = cluster->GetDevices(); @@ -560,6 +564,13 @@ bool SchedulingPass(Cluster* cluster, GrapplerItem* item) { VLOG(1) << "Missing properties for " << node->name(); continue; } + const TensorShapeProto& shape = + properties.GetOutputProperties(node->name())[0].shape(); + PartialTensorShape shp(shape); + if (!shp.IsFullyDefined()) { + VLOG(1) << "Shape not fully known for " << node->name(); + continue; + } // Compute a topological ordering for the node fanin. std::unordered_map topo_order; @@ -608,8 +619,6 @@ bool SchedulingPass(Cluster* cluster, GrapplerItem* item) { } } - const TensorShapeProto& shape = - properties.GetOutputProperties(node->name())[0].shape(); DataType dtype = node->attr().at("T").type(); const string& device = node->device(); @@ -1223,7 +1232,8 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, bool updated_graph = true; for (int i = 0; i < 25 && updated_graph; ++i) { updated_graph = false; - if ((optimization_level_ == RewriterConfig::SCHEDULING_HEURISTICS || + if ((optimization_level_ == RewriterConfig::DEFAULT_MEM_OPT || + optimization_level_ == RewriterConfig::SCHEDULING_HEURISTICS || optimization_level_ == RewriterConfig::HEURISTICS) && cluster != nullptr) { updated_graph |= SchedulingPass(cluster, &optimized_item); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 6d93f74186..ab7e05dd5d 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -101,7 +101,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); } - if (cfg_.memory_optimization() > 1) { + if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" @@ -136,7 +136,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, bool already_optimized = false; for (const auto& optimizer : optimizers) { if (!already_optimized) { - auto status = optimizer->Optimize(cluster, item, optimized_graph); + Status status = optimizer->Optimize(cluster, item, optimized_graph); string result; if (!status.ok()) { VLOG(1) << "Not able to apply optimizer " << optimizer->name() @@ -152,7 +152,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, << " return status: " << result; } else { GrapplerItem optimized_item(item, std::move(*optimized_graph)); - auto status = + Status status = optimizer->Optimize(cluster, optimized_item, optimized_graph); string result; if (!status.ok()) { @@ -205,7 +205,8 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.constant_folding() != RewriterConfig::OFF || cfg.dependency_optimization() != RewriterConfig::OFF || cfg.arithmetic_optimization() != RewriterConfig::OFF || - cfg.auto_parallel().enable() || cfg.memory_optimization() > 1 || + cfg.auto_parallel().enable() || + cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || !cfg.optimizers().empty(); } diff --git a/tensorflow/core/grappler/optimizers/model_pruner.cc b/tensorflow/core/grappler/optimizers/model_pruner.cc index ece9df012e..f52a2ab862 100644 --- a/tensorflow/core/grappler/optimizers/model_pruner.cc +++ b/tensorflow/core/grappler/optimizers/model_pruner.cc @@ -67,7 +67,7 @@ Status ModelPruner::Optimize(Cluster* cluster, const GrapplerItem& item, // let's be conservative and preserve the graph as is. return errors::InvalidArgument("Invalid input graph."); } - // Try to keep the nodes ordored somewhat topologically since this helps + // Try to keep the nodes ordered somewhat topologically since this helps // further optimizations perform better. for (int i = keep.size() - 1; i >= 0; --i) { *runnable_item.graph.add_node() = *keep[i]; diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index dddadceeb5..77667e4358 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -41,7 +41,7 @@ message RewriterConfig { bool disable_model_pruning = 2; enum MemOptType { - // The default setting (currently disabled) + // The default setting (SCHEDULING_HEURISTICS only) DEFAULT_MEM_OPT = 0; // Disabled in the meta-optimizer. NO_MEM_OPT = 1; -- GitLab From 7f743078338101b8a4400813f0545a7757959f34 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Mon, 12 Feb 2018 12:13:15 -0800 Subject: [PATCH 072/629] Sort the dependency --- tensorflow/contrib/tensor_forest/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 6f181288d5..1e4cc3f095 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -497,8 +497,8 @@ py_library( ":tensor_forest_v4_ops_py", "//tensorflow/contrib/decision_trees/proto:generic_tree_model_py", "//tensorflow/contrib/framework:framework_py", - "//tensorflow/contrib/tensor_forest/proto:tensor_forest_params_proto_py", "//tensorflow/contrib/tensor_forest/proto:fertile_stats_proto_py", + "//tensorflow/contrib/tensor_forest/proto:tensor_forest_params_proto_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_for_generated_wrappers", -- GitLab From 957c88853fb3512218939e3c9170aeaac33be044 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 12 Feb 2018 12:09:26 -0800 Subject: [PATCH 073/629] [Java]: Add link to samples in the tensorflow/models repository. PiperOrigin-RevId: 185414475 --- .../java/src/main/java/org/tensorflow/package-info.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/java/src/main/java/org/tensorflow/package-info.java b/tensorflow/java/src/main/java/org/tensorflow/package-info.java index dd4859e1b1..521c5c610c 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/package-info.java +++ b/tensorflow/java/src/main/java/org/tensorflow/package-info.java @@ -35,5 +35,9 @@ limitations under the License. *
  • Graph execution: Using a Session to execute the graphs and find the best label for an * image. * + * + *

    Additional examples can be found in the tensorflow/models + * GitHub repository. */ package org.tensorflow; -- GitLab From f3ebb0b165563192bd80f5dd34a04cd0ac37ff6f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 12:23:16 -0800 Subject: [PATCH 074/629] Add yield_single_examples arg to Estimator.predict PiperOrigin-RevId: 185416396 --- tensorflow/python/estimator/estimator.py | 14 ++++++++++--- tensorflow/python/estimator/estimator_test.py | 21 +++++++++++++++++++ ...rflow.estimator.-baseline-classifier.pbtxt | 2 +- ...orflow.estimator.-baseline-regressor.pbtxt | 2 +- ...nsorflow.estimator.-d-n-n-classifier.pbtxt | 2 +- ...or.-d-n-n-linear-combined-classifier.pbtxt | 2 +- ...tor.-d-n-n-linear-combined-regressor.pbtxt | 2 +- ...ensorflow.estimator.-d-n-n-regressor.pbtxt | 2 +- .../tensorflow.estimator.-estimator.pbtxt | 2 +- ...sorflow.estimator.-linear-classifier.pbtxt | 2 +- ...nsorflow.estimator.-linear-regressor.pbtxt | 2 +- 11 files changed, 41 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 7bf838e5a0..e269b71f2e 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -427,7 +427,8 @@ class Estimator(object): input_fn, predict_keys=None, hooks=None, - checkpoint_path=None): + checkpoint_path=None, + yield_single_examples=True): """Yields predictions for given features. Args: @@ -453,13 +454,18 @@ class Estimator(object): inside the prediction call. checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. + yield_single_examples: If False, yield the whole batch as returned by the + model_fn instead of decomposing the batch into individual elements. This + is useful if model_fn return some tensor with first dimension not + equal to the batch size Yields: Evaluated values of `predictions` tensors. Raises: ValueError: Could not find a trained model in model_dir. - ValueError: if batch length of predictions are not same. + ValueError: if batch length of predictions are not same and + yield_single_examples is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. @@ -492,7 +498,9 @@ class Estimator(object): hooks=all_hooks) as mon_sess: while not mon_sess.should_stop(): preds_evaluated = mon_sess.run(predictions) - if not isinstance(predictions, dict): + if not yield_single_examples: + yield preds_evaluated + elif not isinstance(predictions, dict): for pred in preds_evaluated: yield pred else: diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 39a5b998eb..7c7d913c32 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -1472,6 +1472,27 @@ class EstimatorPredictTest(test.TestCase): 'Batch length of predictions should be same'): next(est.predict(dummy_input_fn)) + def test_iterate_batches(self): + + def _model_fn(features, labels, mode): + _, _ = features, labels + return model_fn_lib.EstimatorSpec( + mode, + loss=constant_op.constant(0.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + predictions={ + # First dim is different but the prediction should still work + 'y1': array_ops.zeros(shape=[3]), + 'y2': array_ops.zeros(shape=[5, 3]) + }) + + est = estimator.Estimator(model_fn=_model_fn) + est.train(dummy_input_fn, steps=1) + + predictions = next(est.predict(dummy_input_fn, yield_single_examples=False)) + self.assertAllEqual(predictions['y1'].shape, [3]) + self.assertAllEqual(predictions['y2'].shape, [5, 3]) + def test_predict_keys_defined_for_tensor(self): def _model_fn(features, labels, mode): diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt index 874a73f661..be9ba4ce85 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-classifier.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt index 8da2a2b686..91fca67b6b 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-baseline-regressor.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt index efc441ae2f..cd4f72fcf8 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-classifier.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt index 20ce879870..303fd74a64 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-classifier.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt index 73211aaf8b..c97ea7969e 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-linear-combined-regressor.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt index 27a159639d..4b5b5bf0e3 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-d-n-n-regressor.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt index 76f527f796..42a0d59521 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-estimator.pbtxt @@ -44,7 +44,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt index c45318b98a..2de52d6c57 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-classifier.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt index 04a2aa080d..e552f33720 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-linear-regressor.pbtxt @@ -45,7 +45,7 @@ tf_class { } member_method { name: "predict" - argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'input_fn\', \'predict_keys\', \'hooks\', \'checkpoint_path\', \'yield_single_examples\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\'], " } member_method { name: "train" -- GitLab From c39695dbbce879e3c31d38780a93d81ab99461cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 12:29:05 -0800 Subject: [PATCH 075/629] Add missing feature in make_parse_example_spec documentation. PiperOrigin-RevId: 185417163 --- tensorflow/python/feature_column/feature_column.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 52e42ef018..c416881c31 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -512,6 +512,7 @@ def make_parse_example_spec(feature_columns): ```python # Define features and transformations + feature_a = categorical_column_with_vocabulary_file(...) feature_b = numeric_column(...) feature_c_bucketized = bucketized_column(numeric_column("feature_c"), ...) feature_a_x_feature_c = crossed_column( -- GitLab From 91f3ee3633d3979a9c51a3c3353a8059a5b4436f Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 12 Feb 2018 12:30:01 -0800 Subject: [PATCH 076/629] [TF:XLA] Bump open source llvm revision to r324889 PiperOrigin-RevId: 185417275 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d0f9a8925f..14a4281fae 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -473,11 +473,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/562d4e516ab92302b34b7f4c8833455699bb48de.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/562d4e516ab92302b34b7f4c8833455699bb48de.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/14498370e4dd4de99369b6129328ffcff737fc97.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/14498370e4dd4de99369b6129328ffcff737fc97.tar.gz", ], - sha256 = "cd041cda90f2e29fd3053f3faca182ad7ed871045d789c339d0f7c7d25310ef2", - strip_prefix = "llvm-562d4e516ab92302b34b7f4c8833455699bb48de", + sha256 = "ac8033652d5813f5454bcd32b9530169c1a8a523366e1e76315b319c99a91c83", + strip_prefix = "llvm-14498370e4dd4de99369b6129328ffcff737fc97", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 6841ada9bcd228f4a24c9c7909cec2e85c4bc0df Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 12 Feb 2018 12:50:07 -0800 Subject: [PATCH 077/629] Also add quantization step node to MODEL_VARIABLES collection. PiperOrigin-RevId: 185420228 --- tensorflow/contrib/quantize/python/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/common.py b/tensorflow/contrib/quantize/python/common.py index 3a1fa61e43..9e76549ba5 100644 --- a/tensorflow/contrib/quantize/python/common.py +++ b/tensorflow/contrib/quantize/python/common.py @@ -114,7 +114,9 @@ def CreateOrGetQuantizationStep(): dtype=dtypes.int64, initializer=init_ops.zeros_initializer(), trainable=False, - collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + collections=[ + ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES + ]) with g.name_scope(quantization_step_tensor.op.name + '/'): # We return the incremented variable tensor. Since this is used in conds # for quant_delay and freeze_bn_delay, it will run once per graph -- GitLab From 1eca578242eda2db93cdb2509413996e9294751e Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 5 Feb 2018 13:43:52 -0800 Subject: [PATCH 078/629] [tf.data] Fix use-after-free bug when closing down an input pipeline. This fix affects the distributed runtime; DirectSession use is unaffected. Before this change, an iterator that used a background prefetching thread might attempt to use a captured FunctionLibraryRuntime from a subgraph that had been deregistered (and hence its FunctionLibraryRuntime would have been deleted). This change introduces a mechanism for "cloning" the necessary parts of the FunctionLibraryRuntime so that it can be owned by the IteratorResource. PiperOrigin-RevId: 184579490 --- tensorflow/core/common_runtime/function.cc | 19 ++++++++++++ .../core/common_runtime/graph_optimizer.h | 2 ++ .../process_function_library_runtime.cc | 12 ++++++++ .../process_function_library_runtime.h | 6 ++++ tensorflow/core/framework/function.h | 5 ++++ tensorflow/core/kernels/data/iterator_ops.cc | 7 +++-- .../kernel_tests/iterator_ops_cluster_test.py | 30 +++++++++++++++++++ 7 files changed, 79 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 150fb85c70..248ff9051b 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -183,6 +183,10 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { string DebugString(Handle h) override; + Status Clone(std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) override; + private: typedef FunctionLibraryRuntimeImpl ME; @@ -895,6 +899,21 @@ string FunctionLibraryRuntimeImpl::DebugString(Handle handle) { } } +Status FunctionLibraryRuntimeImpl::Clone( + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) { + TF_RETURN_IF_ERROR( + parent_->Clone(env_, graph_def_version_, optimizer_.options(), + custom_kernel_creator_, out_lib_def, out_pflr)); + *out_flr = (*out_pflr)->GetFLR(device_->name()); + if (out_flr != nullptr) { + return Status::OK(); + } else { + return errors::Internal("Cloning FunctionLibraryRuntime failed."); + } +} + namespace { struct CustomCreatorSingleton { diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h index 8477cea126..80246281cd 100644 --- a/tensorflow/core/common_runtime/graph_optimizer.h +++ b/tensorflow/core/common_runtime/graph_optimizer.h @@ -52,6 +52,8 @@ class GraphOptimizer { shape_map, const std::function& cse_consider_fn = nullptr); + const OptimizerOptions& options() { return opts_; } + private: OptimizerOptions opts_; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index dd4bf6a345..41e1ce8c15 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -350,4 +350,16 @@ void ProcessFunctionLibraryRuntime::Run( done(errors::Internal("Could not find device")); } +Status ProcessFunctionLibraryRuntime::Clone( + Env* env, int graph_def_version, const OptimizerOptions& optimizer_options, + CustomKernelCreator custom_kernel_creator, + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr) { + out_lib_def->reset(new FunctionLibraryDefinition(*lib_def_)); + out_pflr->reset(new ProcessFunctionLibraryRuntime( + device_mgr_, env, graph_def_version, out_lib_def->get(), + optimizer_options, std::move(custom_kernel_creator), parent_)); + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index 9c9c92f1ea..4296f9449f 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -145,6 +145,12 @@ class ProcessFunctionLibraryRuntime { // Removes handle from the state owned by this object. Status RemoveHandle(FunctionLibraryRuntime::Handle handle); + Status Clone(Env* env, int graph_def_version, + const OptimizerOptions& optimizer_options, + CustomKernelCreator custom_kernel_creator, + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr); + friend class FunctionLibraryRuntimeImpl; mutable mutex mu_; diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index b933ee0b0e..7498cde637 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -35,6 +35,7 @@ namespace tensorflow { class CancellationManager; class GraphDef; class OpKernel; +class ProcessFunctionLibraryRuntime; class ResourceMgr; class Rendezvous; class ScopedStepContainer; @@ -534,6 +535,10 @@ class FunctionLibraryRuntime { virtual int graph_def_version() = 0; typedef uint64 LocalHandle; + + virtual Status Clone(std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) = 0; }; // Returns a canonicalized string for the instantiation of the diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index dd5f4a4554..8a420ac26d 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -459,7 +459,7 @@ class IteratorHandleOp : public OpKernel { { mutex_lock l(mu_); if (resource_ == nullptr) { - FunctionLibraryRuntime* lib = context->function_library(); + FunctionLibraryRuntime* lib; std::unique_ptr device_mgr(nullptr); std::unique_ptr flib_def(nullptr); std::unique_ptr pflr(nullptr); @@ -469,6 +469,9 @@ class IteratorHandleOp : public OpKernel { // is sufficient demand, but it will require a significant refactoring. if (!name_.empty()) { lib = CreatePrivateFLR(context, &device_mgr, &flib_def, &pflr); + } else { + OP_REQUIRES_OK(context, context->function_library()->Clone( + &flib_def, &pflr, &lib)); } ResourceMgr* mgr = context->resource_manager(); @@ -538,7 +541,7 @@ class IteratorHandleOp : public OpKernel { // Wrap the existing device in order to see any captured resources // in its resource manager. The existing device will outlive the // IteratorResource, because we are storing the IteratorResource - // in that device's resourc manager. + // in that device's resource manager. Device* wrapped_device = RenamedDevice::NewRenamedDevice( ctx->device()->name(), down_cast(ctx->device()), false /* owns_underlying */, false /* isolate_session_state */); diff --git a/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py index 2c65c49ebd..25c91b42dc 100644 --- a/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py +++ b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops @@ -30,6 +32,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import lookup_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops from tensorflow.python.platform import test @@ -140,6 +143,33 @@ class IteratorClusterTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testImplicitDisposeParallelMapDataset(self): + # Tests whether a parallel map dataset will be cleaned up correctly when + # the pipeline does not run it until exhaustion. + # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> + # RepeatDataset(None) -> PrefetchDataset(100). + worker, _ = test_util.create_local_cluster(1, 1) + + components = (np.arange(1000), + np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis], + np.array(37.0) * np.arange(1000)) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(None).prefetch(10000)) + + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with session.Session(worker[0].target) as sess: + sess.run(init_op) + for _ in range(3): + sess.run(get_next) + if __name__ == "__main__": test.main() -- GitLab From d4ad85b765f5945bf35a256f0dd2e9a5278de3e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Feb 2018 09:40:53 -0800 Subject: [PATCH 079/629] Fix bug in and speed up ConstantFolding::CreateNodeDef(): * Fix bug trying to store more than kintmax32 values in a repeated proto field. * Speed up populating compressed format. Example: tensorflow/python/kernel_tests/large_concat_op_test with size = 2**29+6 goes from ~30 seconds to ~15 seconds. The fraction of time spent in ConstantFolding::CreateNodeDef() goes down from about 35% to about 12%. PiperOrigin-RevId: 184693749 --- .../grappler/optimizers/constant_folding.cc | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 0aeff6222c..2caefdf4bd 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -808,20 +808,26 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name, // Use the packed representation whenever possible to avoid generating large // graphdefs. Moreover, avoid repeating the last values if they're equal. if (tensor->NumElements() > 4) { -#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ - optimized = true; \ - TYPE last = tensor->flat()(0); \ - int last_index = 0; \ - for (int i = 0; i < tensor->NumElements(); ++i) { \ - TYPE cur = tensor->flat()(i); \ - t->add_##NAME##_val(cur); \ - if (cur != last) { \ - last = cur; \ - last_index = i; \ - } \ - } \ - /* Remove all identical trailing values to save memory. */ \ - t->mutable_##NAME##_val()->Truncate(last_index + 1); +#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ + const TYPE* val_ptr = tensor->flat().data(); \ + TYPE last = *val_ptr; \ + int64 last_index = 0; \ + for (int64 i = 0; i < tensor->NumElements(); ++i) { \ + TYPE cur = *val_ptr++; \ + if (cur != last) { \ + last = cur; \ + last_index = i; \ + } \ + } \ + if (last_index < kint32max) { \ + optimized = true; \ + t->mutable_##NAME##_val()->Reserve(last_index + 1); \ + t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \ + val_ptr = tensor->flat().data(); \ + for (int64 i = 0; i <= last_index; ++i) { \ + t->set_##NAME##_val(i, *val_ptr++); \ + } \ + } if (tensor->dtype() == DT_FLOAT) { POPULATE_TENSOR_PROTO(tensor, t, float, float) -- GitLab From c829c71afb14cd41079231b3c3f33fad5e119679 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 6 Feb 2018 17:37:02 -0800 Subject: [PATCH 080/629] [tf.data] Fix a memory leak when an iterator is reinitialized many times in a session. Previously, we would instantiate a new function handle for each function in a dataset each time an iterator on that dataset was initialized. These would only be deleted at session closure, which could lead to an apparent leak of memory over the lifetime of session. PiperOrigin-RevId: 184768730 --- .../core/kernels/data/captured_function.cc | 6 +++++- tensorflow/core/kernels/data/iterator_ops.cc | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index f3e4f1cd3f..f248f7897f 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -32,7 +32,11 @@ Status CapturedFunction::Create( return Status::OK(); } -CapturedFunction::~CapturedFunction() {} +CapturedFunction::~CapturedFunction() { + if (lib_ != nullptr) { + lib_->ReleaseHandle(f_handle_).IgnoreError(); + } +} namespace { class CallFrameBase : public CallFrameInterface { diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 8a420ac26d..fc3e291afb 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -725,18 +725,23 @@ class OneShotIteratorOp : public AsyncOpKernel { Status TryInit(OpKernelContext* ctx, IteratorResource** iterator, ContainerInfo* cinfo) { TF_RETURN_IF_ERROR(cinfo->Init(ctx->resource_manager(), def())); - FunctionLibraryRuntime* lib = ctx->function_library(); + + FunctionLibraryRuntime* lib; + std::unique_ptr flib_def(nullptr); + std::unique_ptr pflr(nullptr); + TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib)); // Create an IteratorResource that will hold the iterator for this op. TF_RETURN_IF_ERROR( ctx->resource_manager()->LookupOrCreate( cinfo->container(), cinfo->name(), iterator, - [lib, this](IteratorResource** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - *ret = new IteratorResource(output_dtypes_, output_shapes_, - graph_def_version_, nullptr, nullptr, - nullptr, lib); - return Status::OK(); - })); + [lib, this, &flib_def, &pflr](IteratorResource** ret) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = new IteratorResource( + output_dtypes_, output_shapes_, graph_def_version_, + nullptr, std::move(flib_def), std::move(pflr), lib); + return Status::OK(); + })); core::ScopedUnref unref_iterator(*iterator); -- GitLab From 0851b150f3fed192db0f51e0a794d2a667b1bc66 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 6 Feb 2018 20:40:00 -0800 Subject: [PATCH 081/629] TPUEstimator: Revert the global_step change and require the user to explicitly pass it. PiperOrigin-RevId: 184784330 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 52 ++++--------------- 1 file changed, 11 insertions(+), 41 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 56793f11d9..2aec7ce707 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -485,7 +485,7 @@ class TPUEstimatorSpec( if self.eval_metrics is not None: host_calls['eval_metrics'] = self.eval_metrics if self.host_call is not None: - host_calls['host_call'] = wrap_hostcall_with_global_step(self.host_call) + host_calls['host_call'] = self.host_call host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls) eval_metric_ops = None if self.eval_metrics is not None: @@ -1303,19 +1303,21 @@ class _ModelFnWrapper(object): self._call_model_fn(features, labels)) loss, train_op = estimator_spec.loss, estimator_spec.train_op - host_call_outfeed_ops = [] if isinstance(estimator_spec, TPUEstimatorSpec): captured_scaffold_fn.capture(estimator_spec.scaffold_fn) - if estimator_spec.host_call is not None: - host_call.record({ - 'host_call': wrap_hostcall_with_global_step( - estimator_spec.host_call)}) - host_call_outfeed_ops = host_call.create_enqueue_op() else: captured_scaffold_fn.capture(None) - with ops.control_dependencies([train_op] + host_call_outfeed_ops): - return array_ops.identity(loss) + # We must run train_op to update the variables prior to running the + # outfeed. + with ops.control_dependencies([train_op]): + host_call_outfeed_ops = [] + if (isinstance(estimator_spec, TPUEstimatorSpec) and + estimator_spec.host_call is not None): + host_call.record({'host_call': estimator_spec.host_call}) + host_call_outfeed_ops = host_call.create_enqueue_op() + with ops.control_dependencies(host_call_outfeed_ops): + return array_ops.identity(loss) return train_step, host_call, captured_scaffold_fn @@ -1657,38 +1659,6 @@ class _OutfeedHostCall(object): return ret -def wrap_hostcall_with_global_step(hostcall): - """Wrap the hostcall so that we update the global step upon every call.""" - if hostcall is None: - return None - host_fn, tensors = hostcall - - def global_step_host_fn(_global_step, *args, **kwargs): # pylint: disable=invalid-name - # Note that we don't have any ordering here, so the graph may see a - # global_step that's off by 1. - state_ops.assign( - training.get_global_step(), - math_ops.cast(_global_step[0], dtypes.int64)) - return host_fn(*args, **kwargs) - # Give the global step tensor a batch dimension. Reshape is not supported for - # int64, so we cast it to int32. - # TODO(jhseu): Remove the cast once int64 is supported. - global_step_tensor = array_ops.reshape( - math_ops.cast(training.get_global_step(), dtypes.int32), [1]) - if isinstance(tensors, dict): - outfeed_tensors = {'_global_step': global_step_tensor} - outfeed_tensors.update(tensors) - return global_step_host_fn, outfeed_tensors - else: - fn_args = util.fn_args(host_fn) - if len(tensors) != len(fn_args): - raise RuntimeError( - 'In TPUEstimatorSpec.host_call, length of tensors {} does not match ' - 'method args of the function, which takes {}.'.format( - len(tensors), len(fn_args))) - return global_step_host_fn, [global_step_tensor] + list(tensors) - - class _OutfeedHostCallHook(session_run_hook.SessionRunHook): """Hook to run host calls when use_tpu=False.""" -- GitLab From 7c799e11b7b765a8d43e409b32d24dfbb5614bf8 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 7 Feb 2018 20:50:09 -0800 Subject: [PATCH 082/629] Move TPU doc to "using_tpu". Add short titles to some docs. PiperOrigin-RevId: 184941101 --- tensorflow/docs_src/install/leftnav_files | 14 +++++++------- .../docs_src/programmers_guide/debugger.md | 2 +- .../docs_src/programmers_guide/leftnav_files | 9 ++++++--- .../using_tpu.md} | 0 tensorflow/docs_src/tutorials/leftnav_files | 16 ++++++++-------- 5 files changed, 22 insertions(+), 19 deletions(-) rename tensorflow/docs_src/{api_guides/python/TPUEstimator.md => programmers_guide/using_tpu.md} (100%) diff --git a/tensorflow/docs_src/install/leftnav_files b/tensorflow/docs_src/install/leftnav_files index 0e8b5ae7a1..e523e06f67 100644 --- a/tensorflow/docs_src/install/leftnav_files +++ b/tensorflow/docs_src/install/leftnav_files @@ -1,16 +1,16 @@ index.md ### Python -install_linux.md -install_mac.md -install_windows.md -install_sources.md +install_linux.md: Ubuntu +install_mac.md: MacOS +install_windows.md: Windows +install_sources.md: From source >>> migration.md ### Other Languages -install_java.md -install_go.md -install_c.md +install_java.md: Java +install_go.md: Go +install_c.md: C diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 9eaee27028..dbc4517087 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -1,4 +1,4 @@ -# Debugging TensorFlow Programs +# TensorFlow Debugger diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 38de3ccc3e..3fe4cb2dda 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -10,7 +10,10 @@ tensors.md variables.md graphs.md saved_model.md + +### Accelerators using_gpu.md +using_tpu.md ### ML Concepts embedding.md @@ -19,9 +22,9 @@ embedding.md debugger.md ### TensorBoard -summaries_and_tensorboard.md -graph_viz.md -tensorboard_histograms.md +summaries_and_tensorboard.md: Visualizing Learning +graph_viz.md: Graphs +tensorboard_histograms.md: Histograms ### Misc version_compat.md diff --git a/tensorflow/docs_src/api_guides/python/TPUEstimator.md b/tensorflow/docs_src/programmers_guide/using_tpu.md similarity index 100% rename from tensorflow/docs_src/api_guides/python/TPUEstimator.md rename to tensorflow/docs_src/programmers_guide/using_tpu.md diff --git a/tensorflow/docs_src/tutorials/leftnav_files b/tensorflow/docs_src/tutorials/leftnav_files index 41ffdc8601..888052428f 100644 --- a/tensorflow/docs_src/tutorials/leftnav_files +++ b/tensorflow/docs_src/tutorials/leftnav_files @@ -1,22 +1,22 @@ index.md ### Images -layers.md -image_recognition.md -image_retraining.md +layers.md: MNIST +image_recognition.md: Image Recognition +image_retraining.md: Image Retraining deep_cnn.md ### Sequences recurrent.md -seq2seq.md -recurrent_quickdraw.md +seq2seq.md: Neural Machine Translation +recurrent_quickdraw.md: Drawing Classification audio_recognition.md ### Data Representation -wide.md -wide_and_deep.md +wide.md: Linear Models +wide_and_deep.md: Wide & Deep Learning word2vec.md -kernel_methods.md +kernel_methods.md: Kernel Methods ### Non-ML mandelbrot.md -- GitLab From 7f982862c68febf8c4e9553a5b848ecd570cb808 Mon Sep 17 00:00:00 2001 From: cclauss Date: Thu, 8 Feb 2018 01:28:06 +0100 Subject: [PATCH 083/629] Fix undefined name: import as_str_any for line 35 (#16668) flake8 testing of https://github.com/tensorflow/tensorflow on Python 2.7.14 $ __flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics__ ``` ./tensorflow/python/util/compat_internal.py:33:12: F821 undefined name 'as_str_any' path = as_str_any(path.__fspath__()) ^ ``` --- tensorflow/python/util/compat_internal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/util/compat_internal.py b/tensorflow/python/util/compat_internal.py index a299b2fc3c..9e60e689d2 100644 --- a/tensorflow/python/util/compat_internal.py +++ b/tensorflow/python/util/compat_internal.py @@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.util.compat import as_str_any + def path_to_str(path): """Returns the file system path representation of a `PathLike` object, else as it is. -- GitLab From 43ecf848478940904e1a2df10df6bfe72163a38d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 12 Feb 2018 13:00:11 -0800 Subject: [PATCH 084/629] Revert "Add checkpoint file prefix check (#14341)" This reverts commit 2a16133061ba3f8fa60c0338cd629f2211f9b17d. --- .../contrib/slim/python/slim/evaluation_test.py | 2 +- tensorflow/python/training/saver.py | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index f5a9299d26..870f504d10 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -236,7 +236,7 @@ class SingleEvaluationTest(test.TestCase): def _prepareCheckpoint(self, checkpoint_path): init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) - saver = saver_lib.Saver() + saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) with self.test_session() as sess: sess.run(init_op) saver.save(sess, checkpoint_path) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 764f840012..3888e9bba4 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1597,9 +1597,9 @@ class Saver(object): [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). Returns: - A string: path prefix used for the checkpoint files. If checkpoint - format is V1 and the saver is sharded, this string ends with: - '-?????-of-nnnnn' where 'nnnnn' is the number of shards created. + A string: path prefix used for the checkpoint files. If the saver is + sharded, this string ends with: '-?????-of-nnnnn' where 'nnnnn' + is the number of shards created. If the saver is empty, returns None. Raises: @@ -1749,11 +1749,6 @@ class Saver(object): return if save_path is None: raise ValueError("Can't load save_path when it is None.") - if (os.path.isfile(save_path) and - self._write_version != saver_pb2.SaverDef.V1): - raise ValueError("The specified path: %s is a file." - " Please specify only the path prefix" - " to the checkpoint files." % save_path) logging.info("Restoring parameters from %s", save_path) if context.in_graph_mode(): sess.run(self.saver_def.restore_op_name, -- GitLab From 54ba7aace17cc25d4d063e174ad3a15db5447085 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 12 Feb 2018 13:00:57 -0800 Subject: [PATCH 085/629] Scope and decorator to automatically add control dependencies. Should mimic the desired behavior of eager code. For now supports only straight-line code and conditionals. PiperOrigin-RevId: 185421760 --- tensorflow/python/eager/function.py | 206 +++++++++++++++++++++++ tensorflow/python/eager/function_test.py | 168 ++++++++++++++++++ 2 files changed, 374 insertions(+) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 767d719ea6..d352d67523 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -36,6 +36,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.util import compat from tensorflow.python.util import nest @@ -813,3 +814,208 @@ def make_defun_op(func, *args, **kwds): if any(isinstance(x, ops.EagerTensor) for x in kwds.values()): raise ValueError("Tensor keyword arguments are not supported.") return _defun_internal(name, func, args, kwds) + + +class AutomaticControlDependencies(object): + """Context manager to automatically add control dependencies. + + Code under this context manager will act as if a sensible set of control + dependencies were present. More specifically: + 1. All stateful ops in the scope will execute + 2. Stateful ops which modify the same resource will execute in program order + + Note: creating variables in an automatic control dependencies context is not + supported (the value of the variables will never change as they will keep + getting reinitialized). + + NOT THREAD SAFE + """ + + def __init__(self): + self._returned_tensors = set() + + def mark_as_return(self, tensor): + self._returned_tensors.add(tensor) + + def __enter__(self): + if context.in_eager_mode(): + return self + # This code assumes no other thread is adding ops to the graph while + # we're adding ops to the graph. + # TODO(apassos): Fix this by locking the graph or using a temporary + # graph (but that would mess up devices and collections at least, + # probably other things as well). + self._graph = ops.get_default_graph() + self._n_operations = len(self._graph.get_operations()) + return self + + def _process_switch(self, switch_op, ops_which_must_run, + last_op_using_resource_tensor, merge_for_resource): + """Processes a switch node for a resource input. + + When tensorflow creates a cond, it creates a control flow context for each + branch of the cond. Each external tensor accessed by that branch is routed + through a switch op, which gets created in the graph _after_ the op which + uses that tensor get created. + + If the resource comes from another switch op we process that one first. + + _process_switch creates a corresponding merge node for the switch node. This + merge node is added to the outer control flow context of the switch + node. We also ensure that: + + 1. The switch node executes after the previous op which used the resource + tensor + + 2. Any op which uses a resource output of the switch node executes before + the merge for the switch node. + + 3. The next op which uses the input resource to the switch node (which + might be another switch node for the other branch of the conditional) + will execute after the merge node is done. + + 4. The merge node is marked as must_run so it will run even if no + subsequent operation uses the resource. + + Args: + switch_op: the switch op to be processed + ops_which_must_run: the set of ops which must run + last_op_using_resource_tensor: map from resource tensor to last op using + it + merge_for_resource: map from resource tensor to merge which must follow + all usages of it. + """ + inp = switch_op.inputs[0] + if inp.dtype == dtypes_module.resource and inp.op.type == "Switch": + self._process_switch(inp.op, ops_which_must_run, + last_op_using_resource_tensor, merge_for_resource) + if switch_op.outputs[0] in merge_for_resource: + return + new_merge = control_flow_ops.merge(switch_op.outputs, + name="artificial_merge") + new_merge[0].op._control_flow_context = ( # pylint: disable=protected-access + switch_op._control_flow_context.outer_context) # pylint: disable=protected-access + # Ensures the merge always runs + ops_which_must_run.add(new_merge[0].op) + if inp in last_op_using_resource_tensor: + # Ensures the switch exectutes after the previous op using the resource. + switch_op._add_control_input(last_op_using_resource_tensor[inp]) # pylint: disable=protected-access + # Ensure the next op outside the cond happens after the merge. + last_op_using_resource_tensor[inp] = new_merge[0].op + if inp in merge_for_resource: + merge_for_resource[inp]._add_control_input(new_merge[0].op) # pylint: disable=protected-access + for o in switch_op.outputs: + # Ensures the merge will execute after all ops inside the cond + merge_for_resource[o] = new_merge[0].op + + def __exit__(self, unused_type, unused_value, unused_traceback): + if context.in_eager_mode(): + return + + if self._graph is not ops.get_default_graph(): + raise RuntimeError( + "Graph changed while trying to add control dependencies.") + + # map from resource tensor to the last op which used it + last_op_using_resource_tensor = {} + # set of conditional and loop exits + ops_which_must_run = set() + # merge which must depend on ops which use this resource + merge_for_resource = {} + + new_operations = self._graph.get_operations()[self._n_operations:] + + # Ensures that uses of resource tensors get serialized properly and all + # execute. This is done by keeping a map from resource tensor to the last op + # in graph-construction order which used it (last_op_using_resource_tensor). + # + # Conditionals are written in TensorFlow such that every external tensor + # accessed in the conditional goes through a switch op and every return + # tensor (it's guaranteed that there will be at least one) goes through a + # merge op. + # + # To handle conditionals, switches are handled in a special way (see + # comments for _process_switch). Merge nodes created by TF's conditional + # logic (as opposed to by _process_switch) are forced to run and also get a + # control dependency added to them to ensure all stateful ops inside their + # control flow context run. + # + # We also ensure that if an op is using a resource output by a switch node + # (that is, a resource tensor for which there's a value in + # merge_for_resource) this op will run before the merge for that resource. + # + # We try to add control inputs to nodes respecting their control flow + # contexts to avoid dead nodes propagating everywhere and leading to + # "retval[0] doesn't have value" errors. If a node gets a control dependency + # on a dead node (i.e. a note from an untaken control flow branch) that node + # will be marked as dead unless it's a merge node. + # + # TODO(apassos): serialize non-resource-taking stateful ops as well, and + # test that it works. Support while loops. Support init_scope escaping from + # this. + for op in new_operations: + control_inputs = set() + # Ensure stateful ops run + if self._graph._registered_ops[op.type].is_stateful: # pylint: disable=protected-access + ops_which_must_run.add(op) + # Ignore switches (they're handled separately) + if op.type == "Switch" and op.inputs[0].dtype == dtypes_module.resource: + continue + # Make merges trigger all other computation which must run + if op.type == "Merge": + for o in ops_which_must_run: + op._add_control_input(o) # pylint: disable=protected-access + for inp in o.inputs: + if inp in last_op_using_resource_tensor: + last_op_using_resource_tensor[inp] = op + ops_which_must_run = set([op]) + continue + for inp in op.inputs: + if inp.dtype == dtypes_module.resource: + # Deal with switches, finally. + if inp.op.type == "Switch": + self._process_switch(inp.op, ops_which_must_run, + last_op_using_resource_tensor, + merge_for_resource) + # Ensure uses of resources are serialized + if inp in last_op_using_resource_tensor: + if (last_op_using_resource_tensor[inp]._control_flow_context # pylint: disable=protected-access + is op._control_flow_context): # pylint: disable=protected-access + control_inputs.add(last_op_using_resource_tensor[inp]) + # Ensure merges happen after the closing of a cond block + if inp in merge_for_resource: + merge_for_resource[inp]._add_control_input(op) # pylint: disable=protected-access + last_op_using_resource_tensor[inp] = op + control_inputs = [c for c in control_inputs + if c._control_flow_context is op._control_flow_context] # pylint: disable=protected-access + op._add_control_inputs(control_inputs) # pylint: disable=protected-access + + # Ensure all ops which must run do run + for r in self._returned_tensors: + r.op._add_control_inputs( # pylint: disable=protected-access + [o for o in ops_which_must_run + if o._control_flow_context is r.op._control_flow_context]) # pylint: disable=protected-access + + +def automatic_control_dependencies(f): + """Wraps f to automatically insert control dependencies. + + The inserted dependencies ensure that: + 1. All stateful ops in f run when the result of f runs + 2. Updates to the same resources happen in order. + + Args: + f: the function to be wrapped. + + Returns: + The wrapped function. + """ + + def wrapper(*args, **kwds): + with AutomaticControlDependencies() as a: + result = f(*args, **kwds) + for t in nest.flatten(result): + a.mark_as_return(t) + return result + + return tf_decorator.make_decorator(f, wrapper) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 3e8e67ac7e..431d9388c0 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope @@ -595,5 +596,172 @@ class FunctionTest(test.TestCase): create_variable() +class AutomaticControlDependenciesTest(test.TestCase): + + def testBasic(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + with function.AutomaticControlDependencies() as c: + v.assign(v + 1) + v.assign(2 * v) + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(), 4.0) + + def testCondMustRun(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + + def true_fn(): + v.assign(v + 1) + return 0.0 + + def false_fn(): + v.assign(v + 4) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) + self.assertAllEqual(val.eval(feed_dict={p: True}), 6.0) + + def testCondMustRunSeparateRead(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + + def true_fn(): + v.assign(v + 1) + return 0.0 + + def false_fn(): + v.assign(v + 4) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + one = constant_op.constant(1.0) + c.mark_as_return(one) + one.eval(feed_dict={p: False}) + self.assertAllEqual(v.read_value().eval(), 5.0) + one.eval(feed_dict={p: True}) + self.assertAllEqual(v.read_value().eval(), 6.0) + + def testCondNested(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + q = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + + def true_fn(): + v.assign(v + 1, name='true') + return 1.0 + + def false_fn(): + + def inner_true_fn(): + v.assign(v * 2, name='false_true') + return 2.0 + + def inner_false_fn(): + v.assign(v * 3, name='false_false') + return 3.0 + + control_flow_ops.cond(q, inner_true_fn, inner_false_fn) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + with ops.name_scope('final'): + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(feed_dict={p: False, q: False}), 3.0) + self.assertAllEqual(val.eval(feed_dict={p: False, q: True}), 6.0) + self.assertAllEqual(val.eval(feed_dict={p: True, q: True}), 7.0) + self.assertAllEqual(val.eval(feed_dict={p: True, q: False}), 8.0) + + def testCondOneBranch(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + + def true_fn(): + return 0.0 + + def false_fn(): + v.assign(v + 4) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) + self.assertAllEqual(val.eval(feed_dict={p: True}), 5.0) + + def testCondOneBranchUpdateBefore(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + v.assign(v * 2) + + def true_fn(): + return 0.0 + + def false_fn(): + v.assign(v + 4) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(feed_dict={p: False}), 6.0) + self.assertAllEqual(val.eval(feed_dict={p: True}), 12.0) + + def testCondOneBranchUpdateAfter(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + p = array_ops.placeholder(dtype=dtypes.bool) + with function.AutomaticControlDependencies() as c: + + def true_fn(): + return 0.0 + + def false_fn(): + v.assign(v + 4) + return 1.0 + + control_flow_ops.cond(p, true_fn, false_fn) + v.assign(v * 2) + val = v.read_value() + c.mark_as_return(val) + self.assertAllEqual(val.eval(feed_dict={p: False}), 10.0) + self.assertAllEqual(val.eval(feed_dict={p: True}), 20.0) + + def testDecorator(self): + with context.graph_mode(), self.test_session(): + v = resource_variable_ops.ResourceVariable(1.0) + variables.global_variables_initializer().run() + + @function.automatic_control_dependencies + def f(): + v.assign(v + 1) + v.assign(2 * v) + return v.read_value() + + self.assertAllEqual(f().eval(), 4.0) + + if __name__ == '__main__': test.main() -- GitLab From 8abf81de646425e910a88c385975c95f0756666b Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 12 Feb 2018 13:11:17 -0800 Subject: [PATCH 086/629] Respect the cluster spec prop during TPU system auto query. PiperOrigin-RevId: 185423314 --- .../contrib/tpu/python/tpu/tpu_context.py | 4 +++- .../tpu/python/tpu/tpu_system_metadata.py | 23 +++++++++++++------ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index 8c65018d14..d735618260 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -106,7 +106,9 @@ class _TPUContext(object): # pylint: disable=protected-access tpu_system_metadata = ( tpu_system_metadata_lib._query_tpu_system_metadata( - master, query_topology=self.model_parallelism_enabled)) + master, + run_config=self._config, + query_topology=self.model_parallelism_enabled)) self._lazy_tpu_system_metadata_dict[master] = tpu_system_metadata return tpu_system_metadata diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py index e003313667..5dcbda714b 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py @@ -45,7 +45,8 @@ _TPUSystemMetadata = collections.namedtuple('_TPUSystemMetadata', [ ]) -def _query_tpu_system_metadata(master_address, query_topology=False): +def _query_tpu_system_metadata(master_address, run_config, + query_topology=False): """Automatically detects the TPU system metadata in the system.""" tpu_core_count = 0 devices = [] @@ -59,8 +60,8 @@ def _query_tpu_system_metadata(master_address, query_topology=False): with ops.Graph().as_default(): with session_lib.Session( master_address, - config=config_pb2.ConfigProto( - operation_timeout_in_ms=_PINGING_MASTER_TIMEOUT_IN_MS)) as sess: + config=_get_session_config_with_timeout( + _PINGING_MASTER_TIMEOUT_IN_MS, run_config)) as sess: devices = sess.list_devices() for device in devices: match = _TPU_DEVICE_REG.match(device.name) @@ -104,7 +105,7 @@ def _query_tpu_system_metadata(master_address, query_topology=False): 'TPU worker has some problems. Available devices: {}'.format( master_address, devices)) - topology = _obtain_topology(master_address) + topology = _obtain_topology(master_address, run_config) metadata = _TPUSystemMetadata( num_cores=tpu_core_count, @@ -118,14 +119,14 @@ def _query_tpu_system_metadata(master_address, query_topology=False): return metadata -def _obtain_topology(master_address): +def _obtain_topology(master_address, run_config): try: logging.info('Initializing TPU system (master: %s) to fetch topology ' 'for model parallelism. This might take a while.', master_address) with ops.Graph().as_default(): - session_config = config_pb2.ConfigProto( - operation_timeout_in_ms=_INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS) + session_config = _get_session_config_with_timeout( + _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS, run_config) with session_lib.Session( master_address, config=session_config) as sess: topology = sess.run(tpu.initialize_system()) @@ -137,3 +138,11 @@ def _obtain_topology(master_address): master_address)) +def _get_session_config_with_timeout(timeout_in_secs, run_config): + cluster_def = None + if run_config.session_config and run_config.session_config.cluster_def: + cluster_def = run_config.session_config.cluster_def + + config = config_pb2.ConfigProto( + operation_timeout_in_ms=timeout_in_secs, cluster_def=cluster_def) + return config -- GitLab From ffc82833fff68cb16b2ffc1a0fe6077016a494e7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Feb 2018 13:25:57 -0800 Subject: [PATCH 087/629] Extend the memory optimizations to also support accumulate_n ops PiperOrigin-RevId: 185425999 --- tensorflow/core/grappler/optimizers/memory_optimizer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index ef178adc14..777cc3a79b 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -490,12 +490,12 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, } bool SchedulingPass(Cluster* cluster, GrapplerItem* item) { - // Look for AddN nodes and record input names. + // Look for AddN nodes (and equivalent) and record input names. GraphView view(&item->graph); std::unordered_map> addn_list; for (NodeDef& node : *item->graph.mutable_node()) { - if (!IsAddN(node)) { + if (!IsAddN(node) && node.op() != "AccumulateNV2") { continue; } // There is nothing to gain by optimizing nodes with 2 or fewer inputs. -- GitLab From 5bdb36e7e1f4add33a6d7f0287b5a3a0492c356f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 12 Feb 2018 13:26:35 -0800 Subject: [PATCH 088/629] allow @{} links to break across lines. PiperOrigin-RevId: 185426070 --- .../docs_src/get_started/get_started_for_beginners.md | 2 +- tensorflow/docs_src/install/install_sources.md | 2 +- tensorflow/docs_src/install/install_windows.md | 2 +- tensorflow/tools/docs/parser.py | 4 ++-- tensorflow/tools/docs/parser_test.py | 7 ++++--- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md index cf514ceaf0..fb235735bc 100644 --- a/tensorflow/docs_src/get_started/get_started_for_beginners.md +++ b/tensorflow/docs_src/get_started/get_started_for_beginners.md @@ -331,7 +331,7 @@ interpret data is such a rich topic that we devote an entire From a code perspective, you build a list of `feature_column` objects by calling functions from the @{tf.feature_column} module. Each object describes an input to the model. To tell the model to interpret data as a floating-point value, -call @{tf.feature_column.numeric_column). In `premade_estimator.py`, all +call @{tf.feature_column.numeric_column}. In `premade_estimator.py`, all four features should be interpreted as literal floating-point values, so the code to create a feature column looks as follows: diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index bc7d2080dc..175b5c4402 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,7 +393,7 @@ TensorFlow programs:

    Hello, TensorFlow!
    -If you are new to TensorFlow, see @{$get_started/get_started$Getting Started with +If you are new to TensorFlow, see @{$get_started$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 86a111c2ec..6cc5b92305 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -153,7 +153,7 @@ TensorFlow programs:
    Hello, TensorFlow!
    -If you are new to TensorFlow, see @{$get_started/get_started$Getting Started with +If you are new to TensorFlow, see @{$get_started$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 3db164c2b5..e758229535 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -111,8 +111,8 @@ SYMBOL_REFERENCE_RE = re.compile( r""" # Start with a literal "@{". @\{ - # Group at least 1 symbol: not "}" or "\n". - ([^}\n]+) + # Group at least 1 symbol, not "}". + ([^}]+) # Followed by a closing "}" \} """, diff --git a/tensorflow/tools/docs/parser_test.py b/tensorflow/tools/docs/parser_test.py index 8a0e9af521..fca5436ca5 100644 --- a/tensorflow/tools/docs/parser_test.py +++ b/tensorflow/tools/docs/parser_test.py @@ -76,8 +76,9 @@ class ParserTest(googletest.TestCase): pass string = ( - 'A @{tf.reference}, another @{tf.reference}, a member ' - '@{tf.reference.foo}, and a @{tf.third$link `text` with `code` in it}.') + 'A @{tf.reference}, another @{tf.reference$with\nnewline}, a member ' + '@{tf.reference.foo}, and a @{tf.third$link `text` with `code` in ' + 'it}.') duplicate_of = {'tf.third': 'tf.fourth'} index = {'tf.reference': HasOneMember, 'tf.reference.foo': HasOneMember.foo, @@ -93,7 +94,7 @@ class ParserTest(googletest.TestCase): self.assertEqual('A ' 'tf.reference, ' 'another ' - 'tf.reference, ' + 'with\nnewline, ' 'a member ' 'tf.reference.foo, ' 'and a link ' -- GitLab From 0981949cb426c1cf8c6c8c027144d99823a73abd Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Feb 2018 13:37:37 -0800 Subject: [PATCH 089/629] Filter out the fake XLA devices to avoid double counting the actual hardware resources available on the machine PiperOrigin-RevId: 185427665 --- tensorflow/core/grappler/clusters/single_machine.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index 862ce4ae88..cc7f418d49 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/session.h" namespace tensorflow { @@ -86,7 +87,9 @@ Status SingleMachine::Provision() { attr = GetLocalCPUInfo(); } else if (dev.device_type() == "GPU") { attr = GetLocalGPUInfo(gpu_id++); - } else { + } else if (dev.device_type().find("XLA") == string::npos) { + // Filter out the fake XLA devices to avoid double counting the actual + // hardware resources that are available. attr.set_type(dev.device_type()); } // Overwrite the memory size since users might have requested to use only a -- GitLab From 144b2edac4c103e8eba1922de3ba3b5f14d17803 Mon Sep 17 00:00:00 2001 From: seaotterman Date: Mon, 12 Feb 2018 23:03:58 +0100 Subject: [PATCH 090/629] Update data_flow_ops.py Adresses #16948 --- tensorflow/python/ops/data_flow_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 95e45bff06..03ed537cfc 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -474,7 +474,7 @@ class QueueBase(object): name: A name for the operation (optional). Returns: - The tuple of concatenated tensors that was dequeued. + The list of concatenated tensors that was dequeued. """ if name is None: name = "%s_DequeueMany" % self._name -- GitLab From b47dcaf853f56f2709db06eb5aec83c545a5c87e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 14:01:51 -0800 Subject: [PATCH 091/629] Return false instead of crashing in Tensor::SharesBufferWith if neither tensor has a buffer assigned yet, since that is a valid state. Returning buf_ != nullptr && b.buf_ != nullptr && buf_->root_buffer() == b.buf_->root_buffer(); still satisfies the contract in the header, i.e. "True iff the two tensors use the same underlying refcounted storage." PiperOrigin-RevId: 185431574 --- tensorflow/core/framework/tensor.cc | 5 ++--- tensorflow/core/framework/tensor_test.cc | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 0645ec4282..5d32b71628 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -1025,9 +1025,8 @@ StringPiece Tensor::tensor_data() const { } bool Tensor::SharesBufferWith(const Tensor& b) const { - CHECK_NE(nullptr, buf_); - CHECK_NE(nullptr, b.buf_); - return buf_->root_buffer() == b.buf_->root_buffer(); + return buf_ != nullptr && b.buf_ != nullptr && + buf_->root_buffer() == b.buf_->root_buffer(); } string Tensor::DebugString() const { diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 81644388ab..b613effd18 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1085,6 +1085,21 @@ class DummyCPUAllocator : public Allocator { void DeallocateRaw(void* ptr) override {} }; +TEST(Tensor, SharesBufferWith) { + Tensor a_empty; + Tensor b_empty; + Tensor a(DT_FLOAT, TensorShape({1})); + Tensor b(DT_FLOAT, TensorShape({1})); + Tensor copy(a); + EXPECT_FALSE(a_empty.SharesBufferWith(a_empty)); + EXPECT_FALSE(a_empty.SharesBufferWith(b_empty)); + EXPECT_FALSE(a_empty.SharesBufferWith(a)); + EXPECT_FALSE(a_empty.SharesBufferWith(copy)); + EXPECT_TRUE(a.SharesBufferWith(a)); + EXPECT_FALSE(a.SharesBufferWith(b)); + EXPECT_TRUE(a.SharesBufferWith(copy)); +} + TEST(Tensor, FailureToAllocate) { TensorShape shape({1}); DummyCPUAllocator allocator; -- GitLab From ef59be7e91a2b61c73b71086a43cfc7d96374e99 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 12 Feb 2018 14:05:08 -0800 Subject: [PATCH 092/629] Add tests for visible api arguments in quantize_graph. PiperOrigin-RevId: 185432142 --- .../quantize/python/quantize_graph_test.py | 140 ++++++++++++------ 1 file changed, 96 insertions(+), 44 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index c57fcd4e4e..5c65a160a0 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -28,13 +28,11 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.platform import googletest -# TODO(suharshs): Add tests for testing experimental APIs and additional -# input arguments class QuantizeGraphTest(test_util.TensorFlowTestCase): # We have a lot of other tests that test the details of the rewrite, here we # just the specific features of the quantize_graph API. - def _RunTestOverParameters(self, test_fn): + def _RunTestOverAllRewrites(self, test_fn): rewrite_fns = [ quantize_graph.create_training_graph, quantize_graph.create_eval_graph, @@ -44,71 +42,125 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): for fn in rewrite_fns: test_fn(fn) + def _RunTestOverTrainingRewrites(self, test_fn): + rewrite_fns = [ + quantize_graph.create_training_graph, + quantize_graph.experimental_create_training_graph, + ] + for fn in rewrite_fns: + test_fn(fn) + + def _RunTestOverExperimentalRewrites(self, test_fn): + rewrite_fns = [ + quantize_graph.experimental_create_training_graph, + quantize_graph.experimental_create_eval_graph, + ] + for fn in rewrite_fns: + test_fn(fn) + def testRewrite(self): - self._RunTestOverParameters(self._TestRewrite) + self._RunTestOverAllRewrites(self._TestRewrite) - def _TestRewrite(self, fn): + def _TestRewrite(self, rewrite_fn): graph = ops.Graph() with graph.as_default(): - batch_size, height, width, depth = 5, 128, 128, 3 - inputs = array_ops.zeros((batch_size, height, width, depth)) - conv = layers.conv2d( - inputs, - 32, [5, 5], - stride=2, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=None, - scope='test') - _ = nn_ops.relu6(conv) + self._ConvLayer() orig_variable_names = set( [v.name for v in graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - fn(graph) + rewrite_fn(graph) q_variables = graph.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) # Ensure that variables were added. self.assertTrue(len(orig_variable_names) < len(q_variables)) def testDefaultGraph(self): - self._RunTestOverParameters(self._TestRewrite) + self._RunTestOverAllRewrites(self._TestRewrite) - def _TestDefaultGraph(self, fn): + def _TestDefaultGraph(self, rewrite_fn): + # Tests that the default graph is correctly used when no args are provided + # to rewrite_fn. with ops.Graph().as_default() as g: - batch_size, height, width, depth = 5, 128, 128, 3 - inputs = array_ops.zeros((batch_size, height, width, depth)) - conv = layers.conv2d( - inputs, - 32, [5, 5], - stride=2, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=None, - scope='test') - _ = nn_ops.relu6(conv) - + self._ConvLayer() orig_variable_names = set( [v.name for v in g.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - - fn() + rewrite_fn() q_variables = g.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) # Ensure that variables were added. self.assertTrue(len(orig_variable_names) < len(q_variables)) - def _WeightInit(self, stddev): - """Returns truncated normal variable initializer. + def testQuantDelay(self): + self._RunTestOverTrainingRewrites(self._TestQuantDelay) - Function is defined purely to shorten the name so that it stops wrapping. - - Args: - stddev: Standard deviation of normal variable. - - Returns: - An initialized that initialzes with a truncated normal variable. - """ - return init_ops.truncated_normal_initializer(stddev=stddev) + def _TestQuantDelay(self, rewrite_fn): + with ops.Graph().as_default() as g: + self._ConvLayer() + quant_delay = 100 + rewrite_fn(quant_delay=quant_delay) + + quant_delay_found = False + for op in g.get_operations(): + # Check to see if the quant_delay is correctly set. + if 'activate_quant' in op.name and op.type == 'Const': + quant_delay_found = True + const_value = str(op.get_attr('value')) + self.assertTrue(('int64_val: %i' % quant_delay) in const_value) + self.assertTrue(quant_delay_found) + + def testWeightBits(self): + self._RunTestOverExperimentalRewrites(self._TestWeightBits) + + def _TestWeightBits(self, rewrite_fn): + with ops.Graph().as_default() as g: + self._ConvLayer() + weight_bits = 4 + rewrite_fn(weight_bits=weight_bits) + + weights_quant_found = False + for op in g.get_operations(): + # Check to see if FakeQuant operations for weights have the right bits + # set. + if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars': + weights_quant_found = True + self.assertEqual(op.get_attr('num_bits'), weight_bits) + self.assertTrue(weights_quant_found) + + def testActivationBits(self): + self._RunTestOverExperimentalRewrites(self._TestActivationBits) + + def _TestActivationBits(self, rewrite_fn): + with ops.Graph().as_default() as g: + self._ConvLayer() + activation_bits = 4 + rewrite_fn(activation_bits=activation_bits) + + act_quant_found = False + for op in g.get_operations(): + # Check to see if FakeQuant operations for activations have the right bits + # set. + act_quant_names = ['act_quant', 'conv_quant', 'add_quant'] + if any(s in op.name + for s in act_quant_names) and op.type == 'FakeQuantWithMinMaxVars': + act_quant_found = True + self.assertEqual(op.get_attr('num_bits'), activation_bits) + self.assertTrue(act_quant_found) + + def _ConvLayer(self): + """Add a basic convolution layer to the default graph.""" + batch_size, height, width, depth = 5, 128, 128, 3 + inputs = array_ops.zeros((batch_size, height, width, depth)) + weight_init = init_ops.truncated_normal_initializer + conv = layers.conv2d( + inputs, + 32, [5, 5], + stride=2, + padding='SAME', + weights_initializer=weight_init(0.09), + activation_fn=None, + scope='test') + _ = nn_ops.relu6(conv) if __name__ == '__main__': -- GitLab From 9d8994ad0f8ba11911ab08d6d755abec40cfb84f Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 9 Feb 2018 14:37:24 -0800 Subject: [PATCH 093/629] Remove header dependence on cuda_config.h to fix opensource custom op support. Fixes #14454 Fixes #12860 PiperOrigin-RevId: 185194924 --- tensorflow/core/common_runtime/gpu/gpu_device.cc | 4 ++++ tensorflow/stream_executor/dso_loader.cc | 4 ++++ tensorflow/stream_executor/dso_loader.h | 4 ---- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 80a5bdbfff..dc92da7738 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -65,6 +65,10 @@ limitations under the License. #include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/stream_executor_util.h" +#if !defined(PLATFORM_GOOGLE) +#include "cuda/cuda_config.h" +#endif + namespace tensorflow { // Eigen Ops directly allocate memory only for temporary buffers used diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc index d71938634d..d9fe301b8f 100644 --- a/tensorflow/stream_executor/dso_loader.cc +++ b/tensorflow/stream_executor/dso_loader.cc @@ -33,6 +33,10 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" +#if !defined(PLATFORM_GOOGLE) +#include "cuda/cuda_config.h" +#endif + namespace perftools { namespace gputools { namespace internal { diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h index 9495f7253a..354c7b50b8 100644 --- a/tensorflow/stream_executor/dso_loader.h +++ b/tensorflow/stream_executor/dso_loader.h @@ -28,10 +28,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/mutex.h" -#if !defined(PLATFORM_GOOGLE) -#include "cuda/cuda_config.h" -#endif - namespace perftools { namespace gputools { namespace internal { -- GitLab From 6898a5689083fb9e78d02a0d3a66595f8c41729f Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 12 Feb 2018 14:37:41 -0800 Subject: [PATCH 094/629] Rename op name in comments to reflect renamed op names. NFC. PiperOrigin-RevId: 185437550 --- tensorflow/python/kernel_tests/stack_op_test.py | 6 +++--- tensorflow/python/kernel_tests/unstack_op_test.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/kernel_tests/stack_op_test.py b/tensorflow/python/kernel_tests/stack_op_test.py index 347baf8114..2f27d1839b 100644 --- a/tensorflow/python/kernel_tests/stack_op_test.py +++ b/tensorflow/python/kernel_tests/stack_op_test.py @@ -50,7 +50,7 @@ class StackOpTest(test.TestCase): # Convert [data[0], data[1], ...] separately to tensorflow # TODO(irving): Remove list() once we handle maps correctly xs = list(map(constant_op.constant, data)) - # Pack back into a single tensorflow tensor + # Stack back into a single tensorflow tensor c = array_ops.stack(xs) self.assertAllEqual(c.eval(), data) @@ -78,7 +78,7 @@ class StackOpTest(test.TestCase): for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2): for dtype in [np.bool, np.float32, np.int32, np.int64]: data = np.random.randn(*shape).astype(dtype) - # Pack back into a single tensorflow tensor directly using np array + # Stack back into a single tensorflow tensor directly using np array c = array_ops.stack(data) # This is implemented via a Const: self.assertEqual(c.op.type, "Const") @@ -223,7 +223,7 @@ class StackOpTest(test.TestCase): array_ops.stack(t, axis=-3) -class AutomaticPackingTest(test.TestCase): +class AutomaticStackingTest(test.TestCase): def testSimple(self): with self.test_session(use_gpu=True): diff --git a/tensorflow/python/kernel_tests/unstack_op_test.py b/tensorflow/python/kernel_tests/unstack_op_test.py index 8481875576..1ee6e0866a 100644 --- a/tensorflow/python/kernel_tests/unstack_op_test.py +++ b/tensorflow/python/kernel_tests/unstack_op_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functional tests for Unpack Op.""" +"""Functional tests for Unstack Op.""" from __future__ import absolute_import from __future__ import division @@ -49,7 +49,7 @@ class UnstackOpTest(test.TestCase): data = np.random.randn(*shape).astype(dtype) # Convert data to a single tensorflow tensor x = constant_op.constant(data) - # Unpack into a list of tensors + # Unstack into a list of tensors cs = array_ops.unstack(x, num=shape[0]) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) @@ -66,7 +66,7 @@ class UnstackOpTest(test.TestCase): data = np.random.randn(*shape).astype(dtype) # Convert data to a single tensorflow tensor x = constant_op.constant(data) - # Unpack into a list of tensors + # Unstack into a list of tensors cs = array_ops.unstack(x, num=shape[0]) self.assertEqual(type(cs), list) self.assertEqual(len(cs), shape[0]) -- GitLab From 5760acee1e214865870b835000285c92820ca879 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 14:38:37 -0800 Subject: [PATCH 095/629] Add a caveat that pixel value range might not be preserved by ResizeArea. PiperOrigin-RevId: 185437687 --- tensorflow/core/api_def/base_api/api_def_ResizeArea.pbtxt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/core/api_def/base_api/api_def_ResizeArea.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResizeArea.pbtxt index 317ad263cc..3730ef5ef9 100644 --- a/tensorflow/core/api_def/base_api/api_def_ResizeArea.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ResizeArea.pbtxt @@ -31,6 +31,11 @@ END description: < Date: Mon, 12 Feb 2018 14:52:47 -0800 Subject: [PATCH 096/629] Add an option to tf_gen_op_wrapper_py to make it able to run the genrule locally. PiperOrigin-RevId: 185439892 --- tensorflow/tensorflow.bzl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 411b393b0a..82142fa21d 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -498,6 +498,9 @@ def tf_gen_op_wrappers_cc(name, # is invalid to specify both "hidden" and "op_whitelist". # cc_linkopts: Optional linkopts to be added to tf_cc_binary that contains the # specified ops. +# gen_locally: if True, the genrule to generate the Python library will be run +# without sandboxing. This would help when the genrule depends on symlinks +# which may not be supported in the sandbox. def tf_gen_op_wrapper_py(name, out=None, hidden=None, @@ -508,7 +511,8 @@ def tf_gen_op_wrapper_py(name, generated_target_name=None, op_whitelist=[], cc_linkopts=[], - api_def_srcs=[]): + api_def_srcs=[], + gen_locally=False): if (hidden or hidden_file) and op_whitelist: fail('Cannot pass specify both hidden and op_whitelist.') @@ -563,6 +567,7 @@ def tf_gen_op_wrapper_py(name, outs=[out], srcs=api_def_srcs + [hidden_file], tools=[tool_name] + tf_binary_additional_srcs(), + local = (1 if gen_locally else 0), cmd=("$(location " + tool_name + ") " + api_def_args_str + " @$(location " + hidden_file + ") " + ("1" if require_shape_functions else "0") + " > $@")) @@ -572,6 +577,7 @@ def tf_gen_op_wrapper_py(name, outs=[out], srcs=api_def_srcs, tools=[tool_name] + tf_binary_additional_srcs(), + local = (1 if gen_locally else 0), cmd=("$(location " + tool_name + ") " + api_def_args_str + " " + op_list_arg + " " + ("1" if require_shape_functions else "0") + " " + -- GitLab From f686d94f9043a13f258249afdb47b5b88f918671 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 12 Feb 2018 15:12:31 -0800 Subject: [PATCH 097/629] Avoid setting `ConfigProto.cluster_def` when `run_config.cluster_def` is not set. PiperOrigin-RevId: 185443115 --- tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py index 5dcbda714b..ea1a82959c 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py @@ -140,7 +140,7 @@ def _obtain_topology(master_address, run_config): def _get_session_config_with_timeout(timeout_in_secs, run_config): cluster_def = None - if run_config.session_config and run_config.session_config.cluster_def: + if run_config.session_config and run_config.session_config.cluster_def.job: cluster_def = run_config.session_config.cluster_def config = config_pb2.ConfigProto( -- GitLab From 4a1bcf7c4193906a4e77c1726657429e37579a65 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 12 Feb 2018 16:20:07 -0800 Subject: [PATCH 098/629] Automated g4 rollback of changelist 185420228 PiperOrigin-RevId: 185453293 --- tensorflow/contrib/quantize/python/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/contrib/quantize/python/common.py b/tensorflow/contrib/quantize/python/common.py index 9e76549ba5..3a1fa61e43 100644 --- a/tensorflow/contrib/quantize/python/common.py +++ b/tensorflow/contrib/quantize/python/common.py @@ -114,9 +114,7 @@ def CreateOrGetQuantizationStep(): dtype=dtypes.int64, initializer=init_ops.zeros_initializer(), trainable=False, - collections=[ - ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES - ]) + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) with g.name_scope(quantization_step_tensor.op.name + '/'): # We return the incremented variable tensor. Since this is used in conds # for quant_delay and freeze_bn_delay, it will run once per graph -- GitLab From 96564330fb0508a50a0515be11c9202c64b0f5b7 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Mon, 12 Feb 2018 16:24:45 -0800 Subject: [PATCH 099/629] Support None trainable variables that don't produce a gradient in replicate_model_fn. This fixes #16829. PiperOrigin-RevId: 185453911 --- .../estimator/python/estimator/replicate_model_fn.py | 2 +- .../python/estimator/replicate_model_fn_test.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py index dfae034afc..7134cd3f5a 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn.py @@ -790,7 +790,7 @@ def _extract_tensors(tensors_and_vars): tensor, _ = tensor_and_var if isinstance(tensor, ops_lib.IndexedSlices): tensors.append(tensor.values) - else: + elif tensor is not None: tensors.append(tensor) return tensors diff --git a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py index ab117e61a7..d46a18aacf 100644 --- a/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py @@ -240,6 +240,13 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): labels = np.array([[1.0], [2.0]]) with self.test_session() as session: + # Add another trainable variable that doesn't produce a gradient to + # verify that None gradients are supported. + _ = variable_scope.get_variable( + 'another_variable', + initializer=constant_op.constant(1, dtype=dtypes.float64), + dtype=dtypes.float64) + replicated_model_fn = replicate_model_fn.replicate_model_fn( self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( @@ -1119,8 +1126,6 @@ class SplitBatchTest(test_util.TensorFlowTestCase): feature_shards, label_shards = replicate_model_fn._split_batch( features, labels, 2, device='/gpu:0') - print(feature_shards[0]['x'].eval()) - print(feature_shards[1]['x'].eval()) self.assertSparseValuesEqual( sparse_tensor.SparseTensorValue( indices=[[0, 0], [1, 0], [1, 1]], -- GitLab From 929e3ee91ecf7f9685b50fa1681f39d9b25e568b Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 12 Feb 2018 16:56:28 -0800 Subject: [PATCH 100/629] [XLA:GPU] Extend the CustomCall for cudnn convolutions to represent tensor_ops_enabled. The convolution algorithms returned from the stream executor have a flag for whether tensor_ops is enabled. This flag is used when running each algorithm during auto-tunning. However, this flag is not currently represented in the CustomCall representing the auto-tune result. As a result, the algorithm may be run differently after auto-tune. This change adds a constant to the CustomCall for cudnn convolution algorithm selected by auto-tune, to represent whether tensor_ops is enabled during auto-tune. This information is used by convolution thunk to ensure that the algorithm is run with the same flag after auto-tune. PiperOrigin-RevId: 185458497 --- .../xla/service/gpu/convolution_thunk.cc | 7 +++-- .../xla/service/gpu/convolution_thunk.h | 3 +- .../gpu/cudnn_convolution_algorithm_picker.cc | 31 ++++++++++++------- .../gpu/cudnn_convolution_algorithm_picker.h | 2 +- .../service/gpu/cudnn_convolution_runner.cc | 3 ++ .../xla/service/gpu/gpu_copy_insertion.cc | 6 ++-- .../xla/service/gpu/ir_emission_utils.h | 9 +++--- .../xla/service/gpu/ir_emitter_unnested.cc | 11 +++++-- 8 files changed, 46 insertions(+), 26 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index f76f15929d..15bba49b73 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -45,7 +45,7 @@ ConvolutionThunk::ConvolutionThunk( const BufferAllocation::Slice& scratch_buffer, const Shape& input_shape, const Shape& filter_shape, const Shape& output_shape, const Window& window, const ConvolutionDimensionNumbers& dim_nums, int64 algorithm, - const HloInstruction* hlo) + bool tensor_ops_enabled, const HloInstruction* hlo) : Thunk(Kind::kConvolution, hlo), convolution_kind_(convolution_kind), input_buffer_(input_buffer), @@ -58,7 +58,8 @@ ConvolutionThunk::ConvolutionThunk( output_shape_(output_shape), window_(window), dim_nums_(dim_nums), - algorithm_(algorithm) {} + algorithm_(algorithm), + tensor_ops_enabled_(tensor_ops_enabled) {} Status ConvolutionThunk::ExecuteOnStream( const BufferAllocations& buffer_allocations, se::Stream* stream) { @@ -72,7 +73,7 @@ Status ConvolutionThunk::ExecuteOnStream( buffer_allocations.GetDeviceAddress(scratch_buffer_); se::dnn::AlgorithmConfig algorithm_config( - se::dnn::AlgorithmDesc(algorithm_, /*use_tensor_ops=*/false)); + se::dnn::AlgorithmDesc(algorithm_, tensor_ops_enabled_)); TF_RETURN_IF_ERROR(RunCudnnConvolution( convolution_kind_, input_shape_, filter_shape_, output_shape_, input_data, diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index ca9ef5277b..900d9cb624 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -59,7 +59,7 @@ class ConvolutionThunk : public Thunk { const Shape& input_shape, const Shape& filter_shape, const Shape& output_shape, const Window& window, const ConvolutionDimensionNumbers& dim_nums, int64 algorithm, - const HloInstruction* hlo); + bool tensor_ops_enabled, const HloInstruction* hlo); ConvolutionThunk(const ConvolutionThunk&) = delete; ConvolutionThunk& operator=(const ConvolutionThunk&) = delete; @@ -99,6 +99,7 @@ class ConvolutionThunk : public Thunk { const Window window_; const ConvolutionDimensionNumbers dim_nums_; int64 algorithm_; + bool tensor_ops_enabled_; }; } // namespace gpu diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index 621b2d510f..c29aa31d4e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -172,7 +172,7 @@ string NumBytesToString(int64 bytes) { // cache misses and doing extra work. Overall, caching doesn't seem worth the // trouble, but we may want to revisit this if we ever find a model where // caching would speed up compilation a lot. -optional> +optional> CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, const Shape& output_shape, const Window& window, @@ -260,8 +260,9 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( << AlgorithmToString(best_result.algorithm()) << ", takes " << best_result.elapsed_time_in_ms() << "ms, and uses " << best_result_bytes_used << "B of scratch memory."; - return std::make_pair(best_result.algorithm().algo_id(), - best_result_bytes_used); + return std::make_tuple(best_result.algorithm().algo_id(), + best_result.algorithm().tensor_ops_enabled(), + best_result_bytes_used); } LOG(WARNING) << "All algorithms tried for convolution " << instr->ToString() @@ -277,19 +278,19 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( const auto& lhs_shape = instr->operand(0)->shape(); const auto& rhs_shape = instr->operand(1)->shape(); const auto& conv_result_shape = instr->shape().tuple_shapes(0); - optional> alg_and_scratch_bytes; + optional> alg_scratch_and_tc; if (call_target == kCudnnConvForwardCallTarget) { - alg_and_scratch_bytes = PickBestAlgorithm( + alg_scratch_and_tc = PickBestAlgorithm( CudnnConvKind::kForward, /*input_shape=*/lhs_shape, /*filter_shape=*/rhs_shape, /*output_shape=*/conv_result_shape, instr->window(), instr->convolution_dimension_numbers(), instr); } else if (call_target == kCudnnConvBackwardInputCallTarget) { - alg_and_scratch_bytes = PickBestAlgorithm( + alg_scratch_and_tc = PickBestAlgorithm( CudnnConvKind::kBackwardInput, /*input_shape=*/conv_result_shape, /*filter_shape=*/rhs_shape, /*output_shape=*/lhs_shape, instr->window(), instr->convolution_dimension_numbers(), instr); } else if (call_target == kCudnnConvBackwardFilterCallTarget) { - alg_and_scratch_bytes = PickBestAlgorithm( + alg_scratch_and_tc = PickBestAlgorithm( CudnnConvKind::kBackwardFilter, /*input_shape=*/lhs_shape, /*filter_shape=*/conv_result_shape, /*output_shape=*/rhs_shape, instr->window(), instr->convolution_dimension_numbers(), instr); @@ -298,17 +299,20 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( << instr->ToString(); } - if (!alg_and_scratch_bytes.has_value()) { + if (!alg_scratch_and_tc.has_value()) { return false; } int64 algorithm; + bool tensor_ops_enabled; int64 scratch_bytes; - std::tie(algorithm, scratch_bytes) = *alg_and_scratch_bytes; + + std::tie(algorithm, tensor_ops_enabled, scratch_bytes) = *alg_scratch_and_tc; VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and " << NumBytesToString(scratch_bytes) - << " of scratch memory: " << instr->ToString(); + << " of scratch memory: " << instr->ToString() + << " tensor_ops_enabled: " << tensor_ops_enabled; // Replace instr with a new CustomCall which has the correct algorithm, and // whose output shape has the appropriate amount of scratch memory. @@ -318,10 +322,15 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( ShapeUtil::MakeShape(U8, {scratch_bytes})}); HloInstruction* algorithm_hlo = computation->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(algorithm))); + HloInstruction* tensor_ops_enabled_hlo = + computation->AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR0(tensor_ops_enabled))); + HloInstruction* new_call = computation->AddInstruction(HloInstruction::CreateCustomCall( new_call_shape, - {instr->mutable_operand(0), instr->mutable_operand(1), algorithm_hlo}, + {instr->mutable_operand(0), instr->mutable_operand(1), algorithm_hlo, + tensor_ops_enabled_hlo}, instr->custom_call_target())); new_call->set_window(instr->window()); new_call->set_convolution_dimension_numbers( diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h index 10e49daee5..516210ec2e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h @@ -47,7 +47,7 @@ class CudnnConvolutionAlgorithmPicker : public HloPassInterface { private: StatusOr RunOnComputation(HloComputation* computation); StatusOr RunOnInstruction(HloInstruction* instr); - tensorflow::gtl::optional> PickBestAlgorithm( + tensorflow::gtl::optional> PickBestAlgorithm( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, const Shape& output_shape, const Window& window, const ConvolutionDimensionNumbers& dnums, HloInstruction* instr); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index f5f52cf62b..81695a6c32 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -106,6 +106,9 @@ Status RunCudnnConvolution( se::ScratchAllocator* scratch_allocator, const Window& window, const ConvolutionDimensionNumbers& dnums, AlgorithmConfig algorithm, Stream* stream, ProfileResult* profile_result /*= nullptr*/) { + VLOG(3) << "Convolution Algorithm: " << algorithm.algorithm().algo_id(); + VLOG(3) << "tensor_ops_enabled: " + << algorithm.algorithm().tensor_ops_enabled(); VLOG(3) << "Convolution kind: " << CudnnConvKindToString(kind); VLOG(3) << "input shape: { " << ShapeUtil::HumanString(input_shape) << " }"; VLOG(3) << "filter shape: { " << ShapeUtil::HumanString(filter_shape) << " }"; diff --git a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc index 88bf5a74fa..916b556fd4 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.cc @@ -79,9 +79,9 @@ StatusOr GpuCopyInsertion::Run(HloModule* module) { TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); } } else if (IsCustomCallToDnnConvolution(*hlo)) { - // The last argument to a CUDNN convolution is its algorithm, which must - // be an HLO constant -- it shouldn't be copied. - for (int64 i = 0; i < hlo->operand_count() - 1; ++i) { + // The last two arguments to a CUDNN convolution are two HLO constants for + // cudnn algorithm and tensor_ops_enabled flag, which shouldn't be copied. + for (int64 i = 0; i < hlo->operand_count() - 2; ++i) { TF_RETURN_IF_ERROR(copy_operand_if_constant(i)); } } else if (ImplementedAsLibraryCall(*hlo)) { diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h index 7ad9680bfb..59455f389e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h @@ -63,10 +63,11 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo); // strings. // // These CustomCalls have window() and convolution_dimension_numbers() set like -// regular convolution ops. They have the same LHS and RHS operands, plus one -// additional int64 operand, representing which cudnn algorithm to run. This -// operand must be an HLO constant. A value of -1 means that the implementation -// is free to choose the best algorithm it can. +// regular convolution ops. They have the same LHS and RHS operands, plus two +// additional constant operands: an int64 operand for the cudnn algorithm and +// a bool operand for whether tensor_ops is enabled. A value of -1 for the cudnn +// algorithm means that the implementation is free to choose the best algorithm +// it can. // // These calls output a tuple (conv_result, scratch_memory), where conv_result // is the actual result of the convolution, and scratch_memory is temporary diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index c81dfbf6c2..7e20af3291 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -393,6 +393,11 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) { CHECK(algorithm_inst->IsConstant()) << algorithm_inst->ToString(); int64 algorithm = algorithm_inst->literal().Get({}); + const HloInstruction* tensor_ops_enabled_inst = custom_call->operand(3); + CHECK(tensor_ops_enabled_inst->IsConstant()) + << tensor_ops_enabled_inst->ToString(); + bool tensor_ops_enabled = tensor_ops_enabled_inst->literal().Get({}); + const auto& target = custom_call->custom_call_target(); std::unique_ptr thunk; if (target == kCudnnConvForwardCallTarget) { @@ -407,7 +412,7 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) { /*filter_shape=*/rhs_shape, /*output_shape=*/conv_result_shape, // custom_call->window(), custom_call->convolution_dimension_numbers(), - algorithm, custom_call); + algorithm, tensor_ops_enabled, custom_call); } else if (target == kCudnnConvBackwardInputCallTarget) { thunk = MakeUnique( CudnnConvKind::kBackwardInput, @@ -420,7 +425,7 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) { /*filter_shape=*/rhs_shape, /*output_shape=*/lhs_shape, // custom_call->window(), custom_call->convolution_dimension_numbers(), - algorithm, custom_call); + algorithm, tensor_ops_enabled, custom_call); } else if (target == kCudnnConvBackwardFilterCallTarget) { thunk = MakeUnique( CudnnConvKind::kBackwardFilter, @@ -433,7 +438,7 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) { /*filter_shape=*/conv_result_shape, /*output_shape=*/rhs_shape, // custom_call->window(), custom_call->convolution_dimension_numbers(), - algorithm, custom_call); + algorithm, tensor_ops_enabled, custom_call); } else { LOG(FATAL) << "Unexpected custom call target: " << custom_call->custom_call_target(); -- GitLab From 8bd6410362903de7a10f8fa048d03b30586cf713 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 17:07:27 -0800 Subject: [PATCH 101/629] Fix a typo in the comments. PiperOrigin-RevId: 185459972 --- tensorflow/core/example/feature_util.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 4e9352ee32..d977935b8a 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -56,9 +56,9 @@ limitations under the License. // // To add values to feature_lists: // AppendFeatureValues({4.0}, -// GetFeatureList("movie_ratings", &se)->Add()); +// GetFeatureList("images", &se)->Add()); // AppendFeatureValues({5.0, 3.0}, -// GetFeatureList("movie_ratings", &se)->Add()); +// GetFeatureList("images", &se)->Add()); // This will create a feature list keyed as "images" with two features: // feature_lists { // feature_list { -- GitLab From 4b9ef6c8e07dea7d18f552fa4955c3176646f95d Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Mon, 12 Feb 2018 17:15:51 -0800 Subject: [PATCH 102/629] Rollforward switch group identification with fixes. Fixed computing the switch depth: with the erroneous switch depth incorrect clusters could be formed. Change the way the switch depth is determined (the switch depth is now on the output side, so a switch always has a switch depth one higher than all its inputs), add further checking during execution. PiperOrigin-RevId: 185461054 --- .../tf2xla/functionalize_control_flow.cc | 334 ++++++++++++++---- .../tf2xla/functionalize_control_flow_test.cc | 10 +- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- 3 files changed, 263 insertions(+), 83 deletions(-) diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index bf304102ed..f8169795dd 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -285,7 +285,8 @@ Status BuildLoopBody(const Graph& graph, Frame* frame, Status FunctionalizeLoop(Graph* graph, Frame* frame, FunctionLibraryDefinition* library) { VLOG(2) << "Frame " << frame->name << " before: " - << dump_graph::DumpGraphToFile("functionalize_before", *graph); + << dump_graph::DumpGraphToFile("functionalize_before", *graph, + library); // Split loop-varying Enter nodes with multiple successors. If the same // Tensor is fed as input to multiple loop arguments, we may end up with a @@ -470,7 +471,7 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, TF_RETURN_IF_ERROR(BuildLoopBody(*graph, frame, &arg_types, &body_graph)); VLOG(2) << "Frame " << frame->name << " condition: " - << dump_graph::DumpGraphToFile("loop_condition", *cond_graph) + << dump_graph::DumpGraphToFile("loop_condition", *cond_graph, library) << " body: " << dump_graph::DumpGraphToFile("loop_body", *body_graph); static std::atomic sequence_num(0LL); @@ -551,7 +552,8 @@ Status FunctionalizeLoop(Graph* graph, Frame* frame, frame->parent->nodes.insert(while_node); VLOG(2) << "Frame " << frame->name << " after: " - << dump_graph::DumpGraphToFile("functionalize_after", *graph); + << dump_graph::DumpGraphToFile("functionalize_after", *graph, + library); return Status::OK(); } @@ -584,11 +586,11 @@ class FunctionalizeCond { explicit CondArgNode(Node* input) : input(input) {} string ToString() const { return strings::StrCat("input=", input->name(), - " switches=", NodesToString(switch_nodes)); + " switches=", NodesToString(switches)); } Node* input; - std::vector switch_nodes; + std::vector switches; }; using CondArgNodes = std::vector; @@ -602,15 +604,22 @@ class FunctionalizeCond { int count; }; - struct PredicateSwitches { - explicit PredicateSwitches(Node* predicate) : predicate(predicate) {} + // Group of switch nodes that will be part of the same XlaIf. + struct SwitchCluster { + explicit SwitchCluster(Node* predicate) : predicate(predicate) {} + string ToString() const { + return strings::StrCat(name, " predicate=", predicate->name(), + " switches=", NodesToString(switches)); + } + string name; Node* predicate; std::vector switches; }; - FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library) - : library_(library), graph_(graph) {} + FunctionalizeCond(Graph* graph, FunctionLibraryDefinition* library, + bool dump_graphs) + : library_(library), graph_(graph), dump_graphs_(dump_graphs) {} // Perform the actual cond functionalization. Iterate over groups of switch // nodes (linked by common predicate), from innermost to outermost, and @@ -621,27 +630,25 @@ class FunctionalizeCond { // frontier (the nodes where the cond ends). StatusOr, std::unordered_set>> - DetermineBranchMapAndFrontier(const std::vector& switches); + DetermineBranchMapAndFrontier(const SwitchCluster& switch_cluster); // Returns XlaIf node created from subgraph of merge and switch nodes. This // encapsulates the process of extracting the bodies needed for the then and // else branch, creates a XlaIf node, removing the nodes of the branches from // the graph and replacing the merge node with a XlaIf. StatusOr ConvertToXlaIf(const CondArgNodes& cond_arg_nodes, - const std::vector& switch_nodes, - const std::vector& merge_nodes, - Node* predicate); + const SwitchCluster& switch_cluster, + const std::vector& switches); // Builds a XlaIfOp to replace the Switch-Graph-Merge cluster with. StatusOr BuildAndAddXlaIfOp(const CondArgNodes& cond_arg_nodes, - const std::vector& switch_nodes, - const std::vector& merge_nodes, - Node* predicate); + const SwitchCluster& switch_cluster, + const std::vector& merge_nodes); // Extracts a function body corresponding to the given input edge of the merge // node. Status ExtractBody(const CondArgNodes& cond_arg_nodes, - const std::vector& switch_nodes, + const std::vector& switches, const std::vector& merge_nodes, int input_edge, Graph* body); @@ -652,9 +659,9 @@ class FunctionalizeCond { // Adds all output edges from the `if_node`. Status AddOutputEdges(const std::vector& outputs, Node* if_node); - // Returns the switches of graph_ (along with grouping predicates) in - // postorder. Dead switch nodes are skipped and removed from the graph. - std::vector DeterminePredicateSwitchOrder(); + // Returns the switch clusters of graph_ in postorder. Dead switch nodes are + // skipped and removed from the graph. + StatusOr> DeterminePredicateSwitchOrder(); // Update the state for destination based on the state of source and the node // being updated. @@ -677,6 +684,7 @@ class FunctionalizeCond { FunctionLibraryDefinition* library_; Graph* graph_; + bool dump_graphs_; }; bool IsDeadSwitch(const Node* node) { @@ -724,10 +732,13 @@ Status FunctionalizeCond::ValidateFrontier( ") in both Else and Then branch should be in Both."); } } - if (pending[kBoth].empty() && pending[kThenBranch].empty() && - pending[kElseBranch].empty()) { - return errors::Internal("Unexpected empty frontier for switch nodes"); - } + // An empty frontier indicates a dead switch. Above we attempt to remove dead + // switch nodes, but not all are removed so don't treat it as an error yet. + // TODO(jpienaar): Find out why dead switch nodes remain. + // if (pending[kBoth].empty() && pending[kThenBranch].empty() && + // pending[kElseBranch].empty()) { + // return errors::Internal("Unexpected empty frontier for switch nodes"); + // } return Status::OK(); } @@ -754,33 +765,191 @@ Status FunctionalizeCond::Join(const ForwardFlowNode& src_state, return Status::OK(); } -std::vector +StatusOr> FunctionalizeCond::DeterminePredicateSwitchOrder() { + struct Cluster { + bool operator==(const Cluster& other) const { + return representative == other.representative; + } + int representative = -1; + }; + + // Perform a DFS over the graph and + // * Determine the reverse topological order of the nodes (there should be no + // cycles at this point so the post-order numbering corresponds to the + // reverse topological sorting); + // * Identify dead switches; + // * Initialize the cluster's representative; + std::vector> clusters(graph_->num_node_ids()); std::vector dead_switches; std::vector switch_order; - DFS(*graph_, nullptr, [this, &dead_switches, &switch_order](Node* n) { + std::vector rev_topo_sorted_nodes; + DFS(*graph_, nullptr, [&](Node* n) { + clusters[n->id()].Get().representative = n->id(); if (IsSwitch(n)) { if (IsDeadSwitch(n)) { dead_switches.push_back(n); } else { + rev_topo_sorted_nodes.push_back(n); switch_order.push_back(n); } + } else if (n->IsOp()) { + // Exclude src and sink nodes from further consideration. + rev_topo_sorted_nodes.push_back(n); } }); + std::vector switch_clusters; + // Return early if there are no switches in the graph. + if (switch_order.empty()) { + return switch_clusters; + } + // Remove all dead switch nodes. for (Node* n : dead_switches) { VLOG(2) << "Removing dead switch: " << n->DebugString(); graph_->RemoveNode(n); } - std::vector predicate_switch_order; - if (switch_order.empty()) { - return predicate_switch_order; + // Identify switch nodes that are part of the same control flow context by + // considering the operands of operations: an operation is part of the same + // control context as its operands unless the operation is a switch. Control + // dependencies are considered part of the same control flow context if the + // switch depth is the same (see comment below). + + // entry_cluster records the input cluster to a switch node. This is used when + // merging with a merge node where the dst's cluster is merged with the entry + // cluster of the merge node's cluster (which corresponds to a switch cluster + // and so has an entry cluster). + std::unordered_map*> entry_cluster; + + // Returns the output cluster of a node. Where the output cluster is cluster + // where the output of the node is used. For non-merge nodes this is simply + // the cluster they are part of, while for merge nodes it is the entry cluster + // of the cluster they are part of (this will correspond to the entry node of + // a switch node that dominates the merge). + auto find_output_cluster = [&](Node* n) { + UnionFind* cluster = &clusters[n->id()]; + if (!IsMerge(n)) return cluster; + auto it = entry_cluster.find(clusters[n->id()].Get().representative); + // If the cluster is not found in the entry_cluster map then an + // instruction not dominated by a switch node has been merged into the + // cluster of the merge. This indicates a failure of the clustering. + CHECK(it != entry_cluster.end()) + << "Unable to find entry for n=" << n->id() << " (" + << cluster->Get().representative << ")"; + return it->second; + }; + + // TODO(jpienaar): This could be combined with DetermineBranchMapAndFrontier. + std::vector switch_depth(graph_->num_node_ids()); + for (auto it = rev_topo_sorted_nodes.rbegin(); + it != rev_topo_sorted_nodes.rend(); ++it) { + Node* n = *it; + + // Compute switch depth. + int new_switch_depth = 0; + for (const Edge* e : n->in_edges()) { + Node* src = e->src(); + new_switch_depth = std::max( + new_switch_depth, switch_depth[src->id()] - (IsMerge(src) ? 1 : 0)); + } + switch_depth[n->id()] = new_switch_depth + (IsSwitch(n) ? 1 : 0); + + // Only merge the input operands of a switch. The switch's clustering itself + // is determined by the interaction of the switch's outputs. + if (IsSwitch(n)) { + Node* input; + TF_CHECK_OK(n->input_node(0, &input)); + entry_cluster[n->id()] = &clusters[input->id()]; + UnionFind* cluster = find_output_cluster(input); + int cluster_depth = switch_depth[cluster->Get().representative]; + // Merge the inputs of the switch node with one another. This results in + // predicates and control input residing in the same cluster. + for (const Edge* e : n->in_edges()) { + Node* src = e->src(); + UnionFind* src_cluster = find_output_cluster(src); + int src_cluster_depth = switch_depth[src_cluster->Get().representative]; + if (cluster_depth != src_cluster_depth) { + return errors::InvalidArgument( + "Unable to functionalize control flow in graph: Switch ('", + n->name(), "') has operands ('", input->name(), "' and '", + src->name(), "') that have different switch depths (", + cluster_depth, " != ", src_cluster_depth, ")"); + } + cluster->Merge(src_cluster); + } + continue; + } + + for (const Edge* e : n->in_edges()) { + Node* src = e->src(); + if (!src->IsOp()) continue; + UnionFind* cluster = find_output_cluster(src); + // Merge a node with its data operands and with its control operands if + // the src and dst are in the same ControlContext. The ControlContext is + // not explicitly available here, and instead the switch depth is used as + // a proxy here. Due to the invariant that control edges can only be from + // a containing scope to an inner scope or from the inner scope to its + // containing scope (for exit nodes), the switch depth will only match if + // the src and dst are in the same ControlContext. Control edges between + // ControlContexts are handled during the extraction. + int src_id = cluster->Get().representative; + int src_depth = switch_depth[src_id]; + if (!e->IsControlEdge() || new_switch_depth == src_depth) { + if (src_depth != new_switch_depth) { + return errors::InvalidArgument( + "Unable to functionalize control flow in graph: Operand ('", + src->name(), "') and operator ('", n->name(), + "') have different switch depths (", src_depth, + " != ", new_switch_depth, ")"); + } + cluster->Merge(&clusters[n->id()]); + } + } } + if (dump_graphs_) { + // Mark the switch cluster each node is part of. + for (Node* n : graph_->nodes()) { + n->ClearAttr("_XlaFunctionalizeSwitchGroup"); + n->AddAttr("_XlaFunctionalizeSwitchGroup", + clusters[n->id()].Get().representative); + } + LOG(INFO) << "FunctionalizeControlFlow (with_clusters): " + << dump_graph::DumpGraphToFile("functionalize_clustered", *graph_, + library_); + } + + // Verify all the nodes of a cluster are at the same depth. + std::unordered_map> cluster_to_depth_node; + for (Node* n : graph_->nodes()) { + int depth = switch_depth[n->id()]; + int cluster_rep = clusters[n->id()].Get().representative; + auto it = cluster_to_depth_node.find(cluster_rep); + if (it == cluster_to_depth_node.end()) { + cluster_to_depth_node[cluster_rep] = std::make_pair(depth, n); + } else { + if (it->second.first != depth) { + return errors::Internal( + "Illegal clustering created, mismatch in depths:", "\n\t", + n->DebugString(), "(", clusters[n->id()].Get().representative, + ") at depth=", depth, " vs\n\t", it->second.second->DebugString(), + "(", clusters[n->id()].Get().representative, ") at depth ", + it->second.first); + } + } + } + + struct Hash { + size_t operator()(const std::pair& item) const { + return Hash64Combine(hash()(item.first), + std::hash()(item.second.representative)); + } + }; + // Merge Switch nodes with common predicate. - std::unordered_map predicate_index; + std::unordered_map, int, Hash> predicate_index; // The nodes in switch_order are in reverse topological order, but the // clustered switches need not be (i.e., when considered as a cluster one // element of a cluster may be later in the topological order than another @@ -789,13 +958,19 @@ FunctionalizeCond::DeterminePredicateSwitchOrder() { for (auto it = switch_order.rbegin(); it != switch_order.rend(); ++it) { Node* pred; TF_CHECK_OK((*it)->input_node(1, &pred)); - if (predicate_index.find(pred) == predicate_index.end()) { - predicate_index[pred] = predicate_switch_order.size(); - predicate_switch_order.emplace_back(pred); + auto repr = std::make_pair(pred, clusters[(*it)->id()].Get()); + if (predicate_index.find(repr) == predicate_index.end()) { + predicate_index[repr] = switch_clusters.size(); + switch_clusters.emplace_back(pred); + // Generate a name by concatenating with the cluster representative as + // there could be multiple switch clusters with the same predicate. + switch_clusters[predicate_index[repr]].name = + strings::StrCat(pred->name(), "_", repr.second.representative, "_If"); } - predicate_switch_order[predicate_index[pred]].switches.push_back(*it); + switch_clusters[predicate_index[repr]].switches.push_back(*it); } - return predicate_switch_order; + + return switch_clusters; } StatusOr> @@ -843,10 +1018,10 @@ StatusOr< std::pair, std::unordered_set>> FunctionalizeCond::DetermineBranchMapAndFrontier( - const std::vector& switches) { + const SwitchCluster& switch_cluster) { std::unordered_map branch_map; std::unordered_set frontier; - std::vector stack = switches; + std::vector stack = switch_cluster.switches; std::vector visited(graph_->num_node_ids(), false); while (!stack.empty()) { Node* n = stack.back(); @@ -888,7 +1063,7 @@ FunctionalizeCond::DetermineBranchMapAndFrontier( } } - if (VLOG_IS_ON(2)) { + if (dump_graphs_) { for (const auto& kv : branch_map) { // Append attribute to the graph if running with logging to make the // changes clearer in the visualization. @@ -900,8 +1075,8 @@ FunctionalizeCond::DetermineBranchMapAndFrontier( } Status FunctionalizeCond::FunctionalizeInternal() { - std::vector predicate_switch_order = - DeterminePredicateSwitchOrder(); + TF_ASSIGN_OR_RETURN(std::vector predicate_switch_order, + DeterminePredicateSwitchOrder()); // Iterate from innermost set of clustered switches to outermost, replacing // matching switch->merge subgraphs with single XlaIf nodes. @@ -914,10 +1089,12 @@ Status FunctionalizeCond::FunctionalizeInternal() { std::unordered_map branch_map; std::unordered_set frontier; TF_ASSIGN_OR_RETURN(std::tie(branch_map, frontier), - DetermineBranchMapAndFrontier(ps.switches)); + DetermineBranchMapAndFrontier(ps)); - VLOG(2) << "FunctionalizeControlFlow (before XlaIf conversion): " - << dump_graph::DumpGraphToFile("functionalize_bc", *graph_); + if (dump_graphs_) + LOG(INFO) << "FunctionalizeControlFlow (before XlaIf conversion): " + << dump_graph::DumpGraphToFile("functionalize_bc", *graph_, + library_); TF_RETURN_IF_ERROR(ValidateFrontier(branch_map, frontier)); // Sort the merge and switch nodes using NodeCmp. The switch-nodes are @@ -934,7 +1111,7 @@ Status FunctionalizeCond::FunctionalizeInternal() { input_index[in] = cond_arg_nodes.size(); cond_arg_nodes.emplace_back(in); } - cond_arg_nodes.at(input_index.at(in)).switch_nodes.push_back(switch_node); + cond_arg_nodes.at(input_index.at(in)).switches.push_back(switch_node); } std::vector merge_nodes(frontier.begin(), frontier.end()); std::sort(merge_nodes.begin(), merge_nodes.end(), NodeCmp()); @@ -943,9 +1120,8 @@ Status FunctionalizeCond::FunctionalizeInternal() { EnsureDominanceAndReturnNonDominatedControlNodes( branch_map, ps.switches)); - TF_ASSIGN_OR_RETURN( - Node * if_node, - ConvertToXlaIf(cond_arg_nodes, ps.switches, merge_nodes, ps.predicate)); + TF_ASSIGN_OR_RETURN(Node * if_node, + ConvertToXlaIf(cond_arg_nodes, ps, merge_nodes)); for (Node* old : old_control_nodes) { graph_->AddControlEdge(old, if_node); } @@ -954,25 +1130,26 @@ Status FunctionalizeCond::FunctionalizeInternal() { graph_->RemoveNode(del_kv.first); } for (auto& kv : cond_arg_nodes) { - for (Node* node : kv.switch_nodes) { + for (Node* node : kv.switches) { graph_->RemoveNode(node); } } - VLOG(2) << "FunctionalizeControlFlow (after XlaIf conversion): " - << dump_graph::DumpGraphToFile("functionalize_ac", *graph_); + if (dump_graphs_) + LOG(INFO) << "FunctionalizeControlFlow (after XlaIf conversion): " + << dump_graph::DumpGraphToFile("functionalize_ac", *graph_, + library_); } return Status::OK(); } StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( - const CondArgNodes& cond_arg_nodes, const std::vector& switch_nodes, - const std::vector& merge_nodes, Node* predicate) { - VLOG(2) << "Build if op for " << NodesToString(merge_nodes) << " with input " - << NodesToString(switch_nodes); + const CondArgNodes& cond_arg_nodes, const SwitchCluster& switch_cluster, + const std::vector& merge_nodes) { + VLOG(2) << "Build if op for " << switch_cluster.name; NodeDef if_def; // Create a new If node using the name of the merge node. - NodeDefBuilder builder(strings::StrCat(predicate->name(), "_If"), "XlaIf"); + NodeDefBuilder builder(switch_cluster.name, "XlaIf"); string branch[] = {"else_branch", "then_branch"}; for (int i = 0; i < 2; ++i) { static std::atomic sequence_num(0LL); @@ -982,12 +1159,9 @@ StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( body_name.set_name( strings::StrCat("_functionalize_if_", branch[i], "_", id)); auto body = xla::MakeUnique(graph_->op_registry()); - TF_RETURN_IF_ERROR( - ExtractBody(cond_arg_nodes, switch_nodes, merge_nodes, i, body.get())); + TF_RETURN_IF_ERROR(ExtractBody(cond_arg_nodes, switch_cluster.switches, + merge_nodes, i, body.get())); VLOG(3) << "Body " << branch[i] << ": " << DebugString(body.get()); - VLOG(4) << "FunctionalizeControlFlow (" << branch[i] << "): " - << dump_graph::DumpGraphToFile( - strings::StrCat("functionalize_", branch[i]), *body); FunctionDef body_fdef; TF_RETURN_IF_ERROR(GraphToFunctionDef(*body, body_name.name(), &body_fdef)); TF_RETURN_IF_ERROR(library_->AddFunctionDef(body_fdef)); @@ -999,7 +1173,7 @@ StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( DataTypeVector in_arg_types; for (auto& kv : cond_arg_nodes) { bool inserted = false; - for (const Node* arg : kv.switch_nodes) { + for (const Node* arg : kv.switches) { const Edge* in_edge; TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge)); if (in_edge->IsControlEdge()) { @@ -1026,10 +1200,11 @@ StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( builder.Attr("Tout", out_type); builder.Attr("Tcond", DT_BOOL); - builder.Device(predicate->assigned_device_name()); + builder.Device(switch_cluster.predicate->assigned_device_name()); // Conditional should be the first input ... builder.Input( - NodeDefBuilder::NodeOut(predicate->name(), 0, predicate->output_type(0))); + NodeDefBuilder::NodeOut(switch_cluster.predicate->name(), 0, + switch_cluster.predicate->output_type(0))); // ... followed by the other inputs. builder.Input(inputs); @@ -1039,7 +1214,7 @@ StatusOr FunctionalizeCond::BuildAndAddXlaIfOp( } Status FunctionalizeCond::ExtractBody(const CondArgNodes& cond_arg_nodes, - const std::vector& switch_nodes, + const std::vector& switches, const std::vector& merge_nodes, int input_edge, Graph* body) { VLOG(2) << "ExtractBody for " << NodesToString(merge_nodes) << " along edge " @@ -1049,7 +1224,7 @@ Status FunctionalizeCond::ExtractBody(const CondArgNodes& cond_arg_nodes, int arg_count = 0; for (auto& kv : cond_arg_nodes) { Node* arg_node = nullptr; - for (const auto* arg : kv.switch_nodes) { + for (const auto* arg : kv.switches) { DataType dtype = arg->input_type(0); if (arg_node == nullptr) { TF_ASSIGN_OR_RETURN(arg_node, BuildArgNode(body, dtype, arg_count++)); @@ -1073,8 +1248,7 @@ Status FunctionalizeCond::ExtractBody(const CondArgNodes& cond_arg_nodes, node_map.at(in->id()) = body->CopyNode(in); } - if (std::find(switch_nodes.begin(), switch_nodes.end(), in) == - switch_nodes.end()) { + if (std::find(switches.begin(), switches.end(), in) == switches.end()) { body->AddEdge(node_map.at(in->id()), in_edge->src_output(), node_map.at(node->id()), 0); } else { @@ -1096,7 +1270,7 @@ Status FunctionalizeCond::AddInputEdges(const CondArgNodes& cond_arg_nodes, graph_->AddEdge(predicate, 0, if_node, index++); for (auto& kv : cond_arg_nodes) { bool inserted = false; - for (const Node* arg : kv.switch_nodes) { + for (const Node* arg : kv.switches) { const Edge* in_edge; TF_RETURN_IF_ERROR(arg->input_edge(0, &in_edge)); if (in_edge->IsControlEdge()) { @@ -1139,16 +1313,17 @@ Status FunctionalizeCond::AddOutputEdges(const std::vector& outputs, } StatusOr FunctionalizeCond::ConvertToXlaIf( - const CondArgNodes& cond_arg_nodes, const std::vector& switch_nodes, - const std::vector& merge_nodes, Node* predicate) { - VLOG(1) << "ConvertToXlaIf for " << NodesToString(switch_nodes) << " -> " + const CondArgNodes& cond_arg_nodes, const SwitchCluster& switch_cluster, + const std::vector& merge_nodes) { + VLOG(1) << "ConvertToXlaIf for " << switch_cluster.ToString() << " -> " << NodesToString(merge_nodes); // Extract bodies and builds a If operator. TF_ASSIGN_OR_RETURN( Node * if_node, - BuildAndAddXlaIfOp(cond_arg_nodes, switch_nodes, merge_nodes, predicate)); - TF_RETURN_IF_ERROR(AddInputEdges(cond_arg_nodes, predicate, if_node)); + BuildAndAddXlaIfOp(cond_arg_nodes, switch_cluster, merge_nodes)); + TF_RETURN_IF_ERROR( + AddInputEdges(cond_arg_nodes, switch_cluster.predicate, if_node)); TF_RETURN_IF_ERROR(AddOutputEdges(merge_nodes, if_node)); return if_node; @@ -1157,18 +1332,19 @@ StatusOr FunctionalizeCond::ConvertToXlaIf( Status FunctionalizeCond::Functionalize(Graph* graph, FunctionLibraryDefinition* library) { VLOG(1) << "FunctionalizeCond::Functionalize"; - FunctionalizeCond fc(graph, library); + FunctionalizeCond fc(graph, library, /*dump_graphs=*/VLOG_IS_ON(2)); return fc.FunctionalizeInternal(); } } // namespace -// Transformation that converts Tensorflow's graph control flow constructs into +// Transformation that converts TensorFlow's graph control flow constructs into // functional equivalents. Status FunctionalizeControlFlow(Graph* graph, FunctionLibraryDefinition* library) { VLOG(2) << "FunctionalizeControlFlow (initial): " - << dump_graph::DumpGraphToFile("functionalize_initial", *graph); + << dump_graph::DumpGraphToFile("functionalize_initial", *graph, + library); // Note: BuildControlFlowInfo() requires that the graph's source node is // connected to all source nodes in the graph. Many graphs violate this // invariant. @@ -1180,7 +1356,8 @@ Status FunctionalizeControlFlow(Graph* graph, for (Node* node : graph->op_nodes()) { const ControlFlowInfo& cf = cf_info[node->id()]; - VLOG(2) << "node: " << node->name() << " frame_name: " << cf.frame_name + VLOG(2) << "node: " << node->name() << " (" << node->id() + << ") frame_name: " << cf.frame_name << " frame: " << (cf.frame ? cf.frame->name() : "---") << " parent_frame: " << (cf.parent_frame ? cf.parent_frame->name() : "---"); @@ -1248,7 +1425,8 @@ Status FunctionalizeControlFlow(Graph* graph, TF_RETURN_IF_ERROR(FunctionalizeCond::Functionalize(graph, library)); VLOG(2) << "FunctionalizeControlFlow (final): " - << dump_graph::DumpGraphToFile("functionalize_final", *graph); + << dump_graph::DumpGraphToFile("functionalize_final", *graph, + library); return Status::OK(); } diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc index 71f12a1333..bc7276c3af 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow_test.cc @@ -38,10 +38,11 @@ namespace { // Returns the names of the "then" and "else" functions for the XlaIf node in a // graph. -Status FindIfThenAndElse(const GraphDef& graph, NameAttrList* then_fn, - NameAttrList* else_fn) { +Status FindIfThenAndElse(const GraphDef& graph, string* op_name, + NameAttrList* then_fn, NameAttrList* else_fn) { for (const NodeDef& node : graph.node()) { if (node.op() == "XlaIf") { + *op_name = node.name(); const NameAttrList* result; TF_RETURN_IF_ERROR(GetNodeAttr(node, "then_branch", &result)); *then_fn = *result; @@ -96,9 +97,10 @@ TEST(FunctionalizeControlFlow, Conditional) { GraphDef graph_def; graph.ToGraphDef(&graph_def); + string op_name; NameAttrList then_fn; NameAttrList else_fn; - TF_EXPECT_OK(FindIfThenAndElse(graph_def, &then_fn, &else_fn)); + TF_EXPECT_OK(FindIfThenAndElse(graph_def, &op_name, &then_fn, &else_fn)); InstantiationResultForTest else_result; TF_EXPECT_OK( InstantiateFunctionForTest(else_fn.name(), library, &else_result)); @@ -109,7 +111,7 @@ TEST(FunctionalizeControlFlow, Conditional) { auto y = ops::Placeholder(scope.WithOpName("y"), DT_INT32); auto x = ops::Placeholder(scope.WithOpName("x"), DT_INT32); auto less = ops::Less(scope.WithOpName("cond/Less"), y, x); - auto if_op = ops::XlaIf(scope.WithOpName("cond/Less_If"), less, + auto if_op = ops::XlaIf(scope.WithOpName(op_name), less, std::initializer_list{less, y, x}, then_fn, else_fn, {DT_INT32}); GraphDef expected; diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 1418d95956..058a1f2621 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -134,7 +134,7 @@ Status GraphCompiler::Compile() { TF_RET_CHECK(src->id() < output_registry.size()); const NodeOutputs& src_outputs = output_registry[src->id()]; - tensor_inputs_[e->dst_input()] = src_outputs[e->src_output()]; + tensor_inputs_.at(e->dst_input()) = src_outputs.at(e->src_output()); } OpKernelContext op_context(¶ms, n->num_outputs()); -- GitLab From 03cebfcc80169b867ff87f700bdfd27e28e1c7bc Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 12 Feb 2018 17:44:01 -0800 Subject: [PATCH 103/629] Enable Model subclassing, both in eager-mode and symbolic-mode. PiperOrigin-RevId: 185464334 --- tensorflow/python/keras/BUILD | 15 + .../python/keras/_impl/keras/backend.py | 65 +- .../keras/_impl/keras/engine/topology.py | 88 ++- .../keras/_impl/keras/engine/training.py | 473 ++++++++++++--- .../_impl/keras/engine/training_eager.py | 12 +- .../_impl/keras/model_subclassing_test.py | 558 ++++++++++++++++++ tensorflow/python/keras/_impl/keras/models.py | 6 +- .../keras/_impl/keras/utils/layer_utils.py | 13 +- tensorflow/python/layers/base.py | 6 +- tensorflow/python/layers/network.py | 72 ++- tensorflow/python/layers/network_test.py | 1 - .../api/golden/tensorflow.keras.-model.pbtxt | 10 +- .../golden/tensorflow.keras.-sequential.pbtxt | 8 +- .../tensorflow.keras.models.-model.pbtxt | 10 +- .../tensorflow.keras.models.-sequential.pbtxt | 8 +- 15 files changed, 1188 insertions(+), 157 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/model_subclassing_test.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index fdac22bb53..d97a035256 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -3,6 +3,8 @@ licenses(["notice"]) # Apache 2.0 +exports_files(["LICENSE"]) + package(default_visibility = ["//visibility:public"]) load("//tensorflow:tensorflow.bzl", "py_test") @@ -734,6 +736,19 @@ py_test( ], ) +py_test( + name = "model_subclassing_test", + size = "medium", + srcs = ["_impl/keras/model_subclassing_test.py"], + srcs_version = "PY2AND3", + tags = ["notsan"], + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + py_test( name = "topology_test", size = "small", diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index e1126365bf..afa183b0a0 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -348,7 +348,8 @@ def learning_phase(): """ if context.in_eager_mode(): if 'eager' not in _GRAPH_LEARNING_PHASES: - raise ValueError('No learning phase set in Eager mode.') + # Fallback to inference mode as default. + return 0 return _GRAPH_LEARNING_PHASES['eager'] graph = ops.get_default_graph() @@ -2598,6 +2599,8 @@ def get_value(x): Returns: A Numpy array. """ + if context.in_eager_mode(): + return x.numpy() return x.eval(session=get_session()) @@ -2611,6 +2614,8 @@ def batch_get_value(tensors): Returns: A list of Numpy arrays. """ + if context.in_eager_mode(): + return [x.numpy() for x in tensors] if tensors: return get_session().run(tensors) else: @@ -2627,16 +2632,19 @@ def set_value(x, value): (of the same shape). """ value = np.asarray(value, dtype=dtype(x)) - tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op + if context.in_eager_mode(): + x.assign(value) else: - assign_placeholder = array_ops.placeholder(tf_dtype, shape=value.shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - get_session().run(assign_op, feed_dict={assign_placeholder: value}) + tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0]) + if hasattr(x, '_assign_placeholder'): + assign_placeholder = x._assign_placeholder + assign_op = x._assign_op + else: + assign_placeholder = array_ops.placeholder(tf_dtype, shape=value.shape) + assign_op = x.assign(assign_placeholder) + x._assign_placeholder = assign_placeholder + x._assign_op = assign_op + get_session().run(assign_op, feed_dict={assign_placeholder: value}) @tf_export('keras.backend.batch_set_value') @@ -2647,23 +2655,28 @@ def batch_set_value(tuples): tuples: a list of tuples `(tensor, value)`. `value` should be a Numpy array. """ - if tuples: - assign_ops = [] - feed_dict = {} + if context.in_eager_mode(): for x, value in tuples: - value = np.asarray(value, dtype=dtype(x)) - tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op - else: - assign_placeholder = array_ops.placeholder(tf_dtype, shape=value.shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - assign_ops.append(assign_op) - feed_dict[assign_placeholder] = value - get_session().run(assign_ops, feed_dict=feed_dict) + x.assign(np.asarray(value, dtype=dtype(x))) + else: + if tuples: + assign_ops = [] + feed_dict = {} + for x, value in tuples: + value = np.asarray(value, dtype=dtype(x)) + tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0]) + if hasattr(x, '_assign_placeholder'): + assign_placeholder = x._assign_placeholder + assign_op = x._assign_op + else: + assign_placeholder = array_ops.placeholder(tf_dtype, + shape=value.shape) + assign_op = x.assign(assign_placeholder) + x._assign_placeholder = assign_placeholder + x._assign_op = assign_op + assign_ops.append(assign_op) + feed_dict[assign_placeholder] = value + get_session().run(assign_ops, feed_dict=feed_dict) @tf_export('keras.backend.print_tensor') diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index d1c1d2c8c4..b267fac7df 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -261,6 +261,10 @@ class Layer(tf_base_layers.Layer): if context.in_eager_mode(): return output + # Un-built subclassed network: build it + if isinstance(self, Network) and not self.inputs: + self._set_inputs(inputs) + # Update learning phase info. output_tensors = _to_list(output) uses_lp = any( @@ -681,10 +685,26 @@ class Network(tf_network.GraphNetwork, Layer): from_config """ - def __init__(self, inputs, outputs, name=None): + def __init__(self, *args, **kwargs): # pylint: disable=super-init-not-called + # Signature detection + if (len(args) == 2 or + len(args) == 1 and 'outputs' in kwargs or + 'inputs' in kwargs and 'outputs' in kwargs): + # Graph network + self._init_graph_network(*args, **kwargs) + else: + # Subclassed network + self._init_subclassed_network(**kwargs) + + def _init_graph_network(self, inputs, outputs, name=None): + # TODO(fchollet): merge back tf.layers.Network and tf.keras.Network + # into a single class tf.keras.Network super(Network, self).__init__(inputs, outputs, name=name) + self._is_compiled = False self.supports_masking = False + self.optimizer = None + # Fill in the output mask cache. masks = [] for x in self.inputs: @@ -719,8 +739,56 @@ class Network(tf_network.GraphNetwork, Layer): for layer in self._output_layers: self.output_names.append(layer.name) - self._internal_input_shapes = [K.int_shape(x) for x in self.inputs] - self._internal_output_shapes = [K.int_shape(x) for x in self.outputs] + def _init_subclassed_network(self, name=None): + self._init_set_name(name) + self._layers = [] + self._is_graph_network = False + self._is_compiled = False + self.outputs = None + self.inputs = None + self.trainable = True + self.supports_masking = False + self.built = False + self.optimizer = None + + # Not used, exists for compatibility purposes due to implementation of + # the base layer tf.layers.Layer - TODO(fchollet): clean up when refactoring + self._scope = None + self._reuse = None + self._dtype = None + self._graph = None + self._activity_regularizer = None + + # Used in symbolic mode only + self._updates = [] + self._losses = [] + + # Used in symbolic mode only, only in conjonction with graph-networks + self._outbound_nodes = [] + self._inbound_nodes = [] + + def __setattr__(self, name, value): + if isinstance(value, (tf_base_layers.Layer, Network)): + try: + is_graph_network = self._is_graph_network + except AttributeError: + raise RuntimeError('It looks like you are subclassing `Model` and you ' + 'forgot to call `super(YourClass, self).__init__()`.' + ' Always start with this line.') + if not is_graph_network: + if value not in self._layers: + self._layers.append(value) + super(Network, self).__setattr__(name, value) + + def add_variable(self, name, shape, dtype=None, initializer=None, + regularizer=None, trainable=True, constraint=None): + raise NotImplementedError('`add_variable` is not supported on Networks') + + def add_loss(self, *args, **kwargs): + if context.in_eager_mode(): + raise NotImplementedError('`add_loss` is not supported in eager-mode ' + 'on Networks') + super(Network, self).add_loss(*args, **kwargs) @property def uses_learning_phase(self): @@ -783,6 +851,9 @@ class Network(tf_network.GraphNetwork, Layer): K.batch_set_value(tuples) def compute_mask(self, inputs, mask): + if not self._is_graph_network: + return None + inputs = _to_list(inputs) if mask is None: masks = [None for _ in range(len(inputs))] @@ -797,6 +868,9 @@ class Network(tf_network.GraphNetwork, Layer): return output_masks def get_config(self): + if not self._is_graph_network: + raise NotImplementedError + config = { 'name': self.name, } @@ -1046,6 +1120,9 @@ class Network(tf_network.GraphNetwork, Layer): model = load_model('my_model.h5') ``` """ + if not self._is_graph_network: + raise NotImplementedError + from tensorflow.python.keras._impl.keras.models import save_model # pylint: disable=g-import-not-at-top save_model(self, filepath, overwrite, include_optimizer) @@ -1148,6 +1225,8 @@ class Network(tf_network.GraphNetwork, Layer): Returns: A JSON string. """ + if not self._is_graph_network: + raise NotImplementedError def get_json_type(obj): # If obj is any numpy type @@ -1183,6 +1262,9 @@ class Network(tf_network.GraphNetwork, Layer): Raises: ImportError: if yaml module is not found. """ + if not self._is_graph_network: + raise NotImplementedError + if yaml is None: raise ImportError('Requires yaml module installed.') return yaml.dump(self._updated_config(), **kwargs) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 118598831d..c4b0414836 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses @@ -35,6 +36,7 @@ from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueu from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.layers.base import _DeferredTensor from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export @@ -628,15 +630,29 @@ class Model(Network): """ loss = loss or {} if context.in_eager_mode() and not isinstance( - optimizer, tf_optimizer_module.Optimizer): + optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)): raise ValueError('Only TF native optimizers are supported in Eager mode.') self.optimizer = optimizers.get(optimizer) self.loss = loss + self.metrics = metrics self.loss_weights = loss_weights if context.in_eager_mode() and sample_weight_mode is not None: raise ValueError('sample_weight_mode is not supported in Eager mode.') self.sample_weight_mode = sample_weight_mode + if context.in_eager_mode() and weighted_metrics is not None: + raise ValueError('weighted_metrics is not supported in Eager mode.') + self.weighted_metrics = weighted_metrics + if context.in_eager_mode() and target_tensors is not None: + raise ValueError('target_tensors is not supported in Eager mode.') + self.target_tensors = target_tensors + + if not self.built: + # Model is not compilable because it does not know its number of inputs + # and outputs, nor their shapes and names. We will compile after the first + # time the model gets called on training data. + return + self._is_compiled = True # Prepare loss functions. if isinstance(loss, dict): @@ -719,8 +735,6 @@ class Model(Network): raise ValueError('target_tensors are not currently supported in Eager' 'mode.') self.total_loss = None - self.metrics = metrics - self.weighted_metrics = weighted_metrics self.metrics_tensors = [] self.metrics_names = ['loss'] for i in range(len(self.outputs)): @@ -732,16 +746,15 @@ class Model(Network): self._feed_sample_weight_modes.append(None) self.sample_weights = [] self.targets = [] - self._collected_trainable_weights = self.trainable_weights for i in range(len(self.outputs)): self._feed_output_names.append(self.output_names[i]) - + self._collected_trainable_weights = self.trainable_weights return # Prepare targets of model. self.targets = [] self._feed_targets = [] - if target_tensors is not None: + if target_tensors not in (None, []): if isinstance(target_tensors, list): if len(target_tensors) != len(self.outputs): raise ValueError( @@ -768,9 +781,9 @@ class Model(Network): if i in skip_target_indices: self.targets.append(None) else: - shape = self._internal_output_shapes[i] + shape = K.int_shape(self.outputs[i]) name = self.output_names[i] - if target_tensors is not None: + if target_tensors not in (None, []): target = target_tensors[i] else: target = None @@ -927,7 +940,7 @@ class Model(Network): if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): # custom handling of accuracy/crossentropy # (because of class mode duality) - output_shape = self._internal_output_shapes[i] + output_shape = K.int_shape(self.outputs[i]) if (output_shape[-1] == 1 or self.loss_functions[i] == losses.binary_crossentropy): # case: binary accuracy/crossentropy @@ -1481,55 +1494,213 @@ class Model(Network): def _standardize_user_data(self, x, - y, + y=None, sample_weight=None, class_weight=None, - check_batch_axis=True, batch_size=None): - if not hasattr(self, 'optimizer'): - raise RuntimeError('You must compile a model before ' - 'training/testing. ' - 'Use `model.compile(optimizer, loss)`.') - - output_shapes = [] - for output_shape, loss_fn in zip(self._feed_output_shapes, - self._feed_loss_fns): - if loss_fn is losses.sparse_categorical_crossentropy: - output_shapes.append(output_shape[:-1] + (1,)) - elif (not hasattr(loss_fn, '__name__') or - getattr(losses, loss_fn.__name__, None) is None): - # If `loss_fn` is not a function (e.g. callable class) - # or if it not in the `losses` module, then - # it is a user-defined loss and we make no assumptions - # about it. - output_shapes.append(None) + """Runs validation checks on input and target data passed by the user. + + Also standardizes the data to lists of arrays, in order. + + Also builds and compiles the model on the fly if it is a subclassed model + that has never been called before (and thus has no inputs/outputs). + + This is a purely internal method, subject to refactoring at any time. + + Args: + x: An array or list of arrays, to be used as input data. If the model + has known, named inputs, this could also be a dict mapping input names + to the corresponding array. + y: An array or list of arrays, to be used as target data. If the model + has known, named outputs, this could also be a dict mapping output names + to the corresponding array. + sample_weight: An optional sample-weight array passed by the user to + weight the importance of each sample in `x`. + class_weight: An optional class-weight array by the user to + weight the importance of samples in `x` based on the class they belong + to, as conveyed by `y`. + batch_size: Integer batch size. If provided, it is used to run additional + validation checks on stateful models. + + Returns: + A tuple of 3 lists: input arrays, target arrays, sample-weight arrays. + If the model's input and targets are symbolic, these lists are empty + (since the model takes no user-provided data, instead the data comes + from the symbolic inputs/targets). + + Raises: + ValueError: In case of invalid user-provided data. + RuntimeError: If the model was never compiled. + """ + # First, we build/compile the model on the fly if necessary. + all_inputs = [] + if not self.built: + # We need to use `x` to set the model inputs. + # We type-check that `x` and `y` are either single arrays + # or lists of arrays. + if isinstance(x, (list, tuple)): + if not all(isinstance(v, np.ndarray) or + tensor_util.is_tensor(v) for v in x): + raise ValueError('Please provide as model inputs either a single ' + 'array or a list of arrays. You passed: x=' + str(x)) + all_inputs += list(x) + elif isinstance(x, dict): + raise ValueError('Please do not pass a dictionary as model inputs.') else: - output_shapes.append(output_shape) + if not isinstance(x, np.ndarray) and not tensor_util.is_tensor(x): + raise ValueError('Please provide as model inputs either a single ' + 'array or a list of arrays. You passed: x=' + str(x)) + all_inputs.append(x) + + # Build the model using the retrieved inputs (value or symbolic). + # If values, then in symbolic-mode placeholders will be created + # to match the value shapes. + if not self.inputs: + self._set_inputs(x) + + if y is not None: + if not self.optimizer: + raise RuntimeError('You must compile a model before ' + 'training/testing. ' + 'Use `model.compile(optimizer, loss)`.') + if not self._is_compiled: + # On-the-fly compilation of the model. + # We need to use `y` to set the model targets. + if isinstance(y, (list, tuple)): + if not all(isinstance(v, np.ndarray) or + tensor_util.is_tensor(v) for v in y): + raise ValueError('Please provide as model targets either a single ' + 'array or a list of arrays. ' + 'You passed: y=' + str(y)) + elif isinstance(y, dict): + raise ValueError('Please do not pass a dictionary as model targets.') + else: + if not isinstance(y, np.ndarray) and not tensor_util.is_tensor(y): + raise ValueError('Please provide as model targets either a single ' + 'array or a list of arrays. ' + 'You passed: y=' + str(y)) + + # Typecheck that all inputs are *either* value *or* symbolic. + # TODO(fchollet): this check could be removed in Eager mode? + if y is not None: + if isinstance(y, (list, tuple)): + all_inputs += list(y) + else: + all_inputs.append(y) + if any(tensor_util.is_tensor(v) for v in all_inputs): + if not all(tensor_util.is_tensor(v) for v in all_inputs): + raise ValueError('Do not pass inputs that mix Numpy arrays and ' + 'TensorFlow tensors. ' + 'You passed: x=' + str(x) + '; y=' + str(y)) + + if context.in_graph_mode(): + # Handle target tensors if any passed. + if not isinstance(y, (list, tuple)): + y = [y] + target_tensors = [v for v in y if tensor_util.is_tensor(v)] + else: + target_tensors = None + self.compile(optimizer=self.optimizer, + loss=self.loss, + metrics=self.metrics, + loss_weights=self.loss_weights, + target_tensors=target_tensors) + + # If `x` and `y` were all symbolic, then no model should not be fed any + # inputs and targets. + # Note: in this case, `any` and `all` are equivalent since we disallow + # mixed symbolic/value inputs. + if any(tensor_util.is_tensor(v) for v in all_inputs): + return [], [], [] + + # What follows is input validation and standardization to list format, + # in the case where all inputs are value arrays. + + if context.in_eager_mode(): + # In eager mode, do not do shape validation. + feed_input_names = self.input_names + feed_input_shapes = None + elif not self._is_graph_network: + # Case: symbolic-mode subclassed network. Do not do shape validation. + feed_input_names = self._feed_input_names + feed_input_shapes = None + else: + # Case: symbolic-mode graph network. + # In this case, we run extensive shape validation checks. + feed_input_names = self._feed_input_names + feed_input_shapes = self._feed_input_shapes + + # Standardize the inputs. x = _standardize_input_data( x, - self._feed_input_names, - self._feed_input_shapes, - check_batch_axis=False, + feed_input_names, + feed_input_shapes, + check_batch_axis=False, # Don't enforce the batch size. exception_prefix='input') - y = _standardize_input_data( - y, - self._feed_output_names, - output_shapes, - check_batch_axis=False, - exception_prefix='target') - sample_weights = _standardize_sample_weights(sample_weight, - self._feed_output_names) - class_weights = _standardize_class_weights(class_weight, - self._feed_output_names) - sample_weights = [ - _standardize_weights(ref, sw, cw, mode) - for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, - self._feed_sample_weight_modes) - ] - _check_array_lengths(x, y, sample_weights) - _check_loss_and_target_compatibility(y, self._feed_loss_fns, - self._feed_output_shapes) + + if y is not None: + if context.in_eager_mode(): + feed_output_names = self.output_names + feed_output_shapes = None + # Sample weighting not supported in this case. + # TODO(fchollet): consider supporting it. + feed_sample_weight_modes = [None for _ in self.outputs] + elif not self._is_graph_network: + feed_output_names = self._feed_output_names + feed_output_shapes = None + # Sample weighting not supported in this case. + # TODO(fchollet): consider supporting it. + feed_sample_weight_modes = [None for _ in self.outputs] + else: + feed_output_names = self._feed_output_names + feed_sample_weight_modes = self._feed_sample_weight_modes + feed_output_shapes = [] + for output_shape, loss_fn in zip(self._feed_output_shapes, + self._feed_loss_fns): + if loss_fn is losses.sparse_categorical_crossentropy: + feed_output_shapes.append(output_shape[:-1] + (1,)) + elif (not hasattr(loss_fn, '__name__') or + getattr(losses, loss_fn.__name__, None) is None): + # If `loss_fn` is not a function (e.g. callable class) + # or if it not in the `losses` module, then + # it is a user-defined loss and we make no assumptions + # about it. + feed_output_shapes.append(None) + else: + feed_output_shapes.append(output_shape) + + # Standardize the outputs. + y = _standardize_input_data( + y, + feed_output_names, + feed_output_shapes, + check_batch_axis=False, # Don't enforce the batch size. + exception_prefix='target') + + # Generate sample-wise weight values given the `sample_weight` and + # `class_weight` arguments. + sample_weights = _standardize_sample_weights(sample_weight, + feed_output_names) + class_weights = _standardize_class_weights(class_weight, + feed_output_names) + sample_weights = [ + _standardize_weights(ref, sw, cw, mode) + for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, + feed_sample_weight_modes) + ] + # Check that all arrays have the same length. + _check_array_lengths(x, y, sample_weights) + if self._is_graph_network and not context.in_eager_mode(): + # Additional checks to avoid users mistakenly using improper loss fns. + _check_loss_and_target_compatibility(y, self._feed_loss_fns, + feed_output_shapes) + else: + y = [] + sample_weights = [] + if self.stateful and batch_size: + # Check that for stateful networks, number of samples is a multiple + # of the static batch size. if x[0].shape[0] % batch_size != 0: raise ValueError('In a stateful network, ' 'you should only pass inputs with ' @@ -1552,6 +1723,140 @@ class Model(Network): deduped_out_labels.append(new_label) return deduped_out_labels + def _set_inputs(self, inputs): + """Set model's input and output specs based on the input data received. + + This is to be used for Model subclasses, which do not know at instantiation + time what their inputs look like. + + Args: + inputs: Single array, or list of arrays. The arrays could be placeholders, + Numpy arrays, or data tensors. + - if placeholders: the model is built on top of these placeholders, + and we expect Numpy data to be fed for them when calling `fit`/etc. + - if Numpy data: we create placeholders matching the shape of the Numpy + arrays. We expect Numpy data to be fed for these placeholders + when calling `fit`/etc. + - if data tensors: the model is built on top of these tensors. + We do not expect any Numpy data to be provided when calling `fit`/etc. + """ + if context.in_eager_mode(): + self._eager_set_inputs(inputs) + else: + self._symbolic_set_inputs(inputs) + + def _eager_set_inputs(self, inputs): + """Set model's input and output specs based on the input data received. + + This is to be used for Model subclasses, which do not know at instantiation + time what their inputs look like. + + We assume the number and ndim of outputs + does not change over different calls. + + Args: + inputs: Argument `x` (input data) passed by the user upon first model use. + + Raises: + ValueError: If the model's inputs are already set. + """ + assert context.in_eager_mode() + if self.inputs: + raise ValueError('Model inputs are already set.') + # On-the-fly setting of model inputs/outputs as DeferredTensors, + # to keep track of number of inputs and outputs and their ndim. + if isinstance(inputs, (list, tuple)): + dummy_output_values = self.call( + [ops.convert_to_tensor(v, dtype=K.floatx()) for v in inputs]) + dummy_input_values = list(inputs) + else: + dummy_output_values = self.call( + ops.convert_to_tensor(inputs, dtype=K.floatx())) + dummy_input_values = [inputs] + if isinstance(dummy_output_values, (list, tuple)): + dummy_output_values = list(dummy_output_values) + else: + dummy_output_values = [dummy_output_values] + self.outputs = [ + _DeferredTensor(shape=(None for _ in v.shape), + dtype=v.dtype) for v in dummy_output_values] + self.inputs = [ + _DeferredTensor(shape=(None for _ in v.shape), + dtype=v.dtype) for v in dummy_input_values] + self.input_names = [ + 'input_%d' % (i + 1) for i in range(len(dummy_input_values))] + self.output_names = [ + 'output_%d' % (i + 1) for i in range(len(dummy_output_values))] + self.built = True + + def _symbolic_set_inputs(self, inputs): + """Set model's inputs based on the input data received from the user. + + This is to be used for Model subclasses, which do not know at instantiation + time what their inputs look like. + + Args: + inputs: Argument `x` (input data) passed by the user upon first model use. + + Raises: + ValueError: If the model's inputs are already set. + """ + assert context.in_graph_mode() + if self.inputs: + raise ValueError('Model inputs are already set.') + + # On-the-fly setting of symbolic model inputs (either by using the tensor + # provided, or by creating a placeholder if Numpy data was provided). + self.inputs = [] + self.input_names = [] + self._feed_inputs = [] + self._feed_input_names = [] + self._feed_input_shapes = [] + if isinstance(inputs, (list, tuple)): + inputs = list(inputs) + else: + inputs = [inputs] + + for i, v in enumerate(inputs): + name = 'input_%d' % (i + 1) + self.input_names.append(name) + if isinstance(v, list): + v = np.asarray(v) + if v.ndim == 1: + v = np.expand_dims(v, 1) + if isinstance(v, (np.ndarray)): + # We fix the placeholder shape except the batch size. + # This is suboptimal, but it is the best we can do with the info + # we have. The user should call `model._set_inputs(placeholders)` + # to specify custom placeholders if the need arises. + shape = (None,) + v.shape[1:] + placeholder = K.placeholder(shape=shape, name=name) + self.inputs.append(placeholder) + self._feed_inputs.append(placeholder) + self._feed_input_names.append(name) + self._feed_input_shapes.append(shape) + else: + # Assumed tensor - TODO(fchollet) additional type check? + self.inputs.append(v) + if K.is_placeholder(v): + self._feed_inputs.append(v) + self._feed_input_names.append(name) + self._feed_input_shapes.append(K.int_shape(v)) + + # Obtain symbolic outputs by calling the model. + if len(self.inputs) == 1: + outputs = self.call(self.inputs[0]) + else: + outputs = self.call(self.inputs) + if isinstance(outputs, (list, tuple)): + outputs = list(outputs) + else: + outputs = [outputs] + self.outputs = outputs + self.output_names = [ + 'output_%d' % (i + 1) for i in range(len(self.outputs))] + self.built = True + def fit(self, x=None, y=None, @@ -1661,6 +1966,9 @@ class Model(Network): ValueError: In case of mismatch between the provided input data and what the model expects. """ + # TODO(fchollet): this method may be creating reference cycles, which would + # lead to accumulating garbage in memory when called in a loop. Investigate. + # Backwards compatibility if batch_size is None and steps_per_epoch is None: batch_size = 32 @@ -1676,13 +1984,13 @@ class Model(Network): raise ValueError('If fitting from data tensors, ' 'you should specify the `steps_per_epoch` ' 'argument.') + # Validate user data. x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight, - check_batch_axis=False, batch_size=batch_size) # Prepare validation data. do_validation = False @@ -1705,7 +2013,6 @@ class Model(Network): val_x, val_y, sample_weight=val_sample_weight, - check_batch_axis=False, batch_size=batch_size) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): val_ins = val_x + val_y + val_sample_weights + [0.] @@ -1859,12 +2166,12 @@ class Model(Network): raise ValueError('If evaluating from data tensors, ' 'you should specify the `steps` ' 'argument.') + # Validate user data. x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, - check_batch_axis=False, batch_size=batch_size) # Prepare inputs, delegate logic to `_test_loop`. if self.uses_learning_phase and not isinstance(K.learning_phase(), int): @@ -1911,20 +2218,7 @@ class Model(Network): raise ValueError('If predicting from data tensors, ' 'you should specify the `steps` ' 'argument.') - # Validate user data. - x = _standardize_input_data( - x, - self._feed_input_names, - self._feed_input_shapes, - check_batch_axis=False) - if self.stateful: - if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: - raise ValueError('In a stateful network, ' - 'you should only pass inputs with ' - 'a number of samples that can be ' - 'divided by the batch size. Found: ' + - str(x[0].shape[0]) + ' samples. ' - 'Batch size: ' + str(batch_size) + '.') + x, _, _ = self._standardize_user_data(x) # Prepare inputs, delegate logic to `_predict_loop`. if self.uses_learning_phase and not isinstance(K.learning_phase(), int): @@ -1982,22 +2276,21 @@ class Model(Network): x, y, sample_weight=sample_weight, - class_weight=class_weight, - check_batch_axis=True) + class_weight=class_weight) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [1.] else: ins = x + y + sample_weights if context.in_eager_mode(): - return training_eager.train_on_batch(self, ins) - - if context.in_graph_mode(): + outputs = training_eager.train_on_batch(self, ins) + else: self._make_train_function() outputs = self.train_function(ins) - if len(outputs) == 1: - return outputs[0] - return outputs + + if len(outputs) == 1: + return outputs[0] + return outputs def test_on_batch(self, x, y, sample_weight=None): """Test the model on a single batch of samples. @@ -2031,21 +2324,21 @@ class Model(Network): ValueError: in case of invalid arguments. """ x, y, sample_weights = self._standardize_user_data( - x, y, sample_weight=sample_weight, check_batch_axis=True) + x, y, sample_weight=sample_weight) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [0.] else: ins = x + y + sample_weights if context.in_eager_mode(): - return training_eager.test_on_batch(self, ins) - - if context.in_graph_mode(): + outputs = training_eager.test_on_batch(self, ins) + else: self._make_test_function() outputs = self.test_function(ins) - if len(outputs) == 1: - return outputs[0] - return outputs + + if len(outputs) == 1: + return outputs[0] + return outputs def predict_on_batch(self, x): """Returns predictions for a single batch of samples. @@ -2057,8 +2350,8 @@ class Model(Network): Numpy array(s) of predictions. """ - x = _standardize_input_data(x, self._feed_input_names, - self._feed_input_shapes) + x, _, _ = self._standardize_user_data(x) + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + [0.] else: @@ -2190,6 +2483,10 @@ class Model(Network): ValueError: In case the generator yields data in an invalid format. """ + if not self._is_graph_network: + raise NotImplementedError( + '`fit_generator` is not yet enabled for Model subclasses') + wait_time = 0.01 # in seconds epoch = initial_epoch @@ -2445,6 +2742,10 @@ class Model(Network): ValueError: In case the generator yields data in an invalid format. """ + if not self._is_graph_network: + raise NotImplementedError( + '`evaluate_generator` is not yet enabled for Model subclasses') + self._make_test_function() steps_done = 0 @@ -2569,6 +2870,10 @@ class Model(Network): ValueError: In case the generator yields data in an invalid format. """ + if not self._is_graph_network: + raise NotImplementedError( + '`predict_generator` is not yet enabled for Model subclasses') + self._make_predict_function() steps_done = 0 diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 0a115969ca..3774596af0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -142,7 +142,7 @@ def _eager_metrics_fn(model, outputs, targets): output_metrics = model.nested_metrics[i] for nested_output_metric in output_metrics: metric_name, metric_fn = _get_metrics_info( - nested_output_metric, model._internal_output_shapes[i], + nested_output_metric, K.int_shape(model.outputs[i]), model.loss_functions[i]) if len(model.output_names) > 1: @@ -173,7 +173,10 @@ def _model_loss(model, inputs, targets): applies masking and sample weighting to the loss value. """ total_loss = 0 - outs = model(inputs) + if len(inputs) == 1: + outs = model.call(inputs[0]) + else: + outs = model.call(inputs) if not isinstance(outs, list): outs = [outs] @@ -646,7 +649,10 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): for i in range(len(model.inputs)): eager_model_inputs.append(ins_batch_converted[i]) - batch_outs = model(eager_model_inputs) + if len(eager_model_inputs) == 1: + batch_outs = model.call(eager_model_inputs[0]) + else: + batch_outs = model.call(eager_model_inputs) if not isinstance(batch_outs, list): batch_outs = [batch_outs] diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py new file mode 100644 index 0000000000..275985aa36 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -0,0 +1,558 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Model subclassing.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tempfile + +import numpy as np + +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util +from tensorflow.python.keras._impl import keras +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test +from tensorflow.python.training.rmsprop import RMSPropOptimizer + +try: + import h5py # pylint:disable=g-import-not-at-top +except ImportError: + h5py = None + + +class SimpleTestModel(keras.Model): + + def __init__(self, use_bn=False, use_dp=False, num_classes=10): + super(SimpleTestModel, self).__init__(name='test_model') + self.use_bn = use_bn + self.use_dp = use_dp + self.num_classes = num_classes + + self.dense1 = keras.layers.Dense(32, activation='relu') + self.dense2 = keras.layers.Dense(num_classes, activation='softmax') + if self.use_dp: + self.dp = keras.layers.Dropout(0.5) + if self.use_bn: + self.bn = keras.layers.BatchNormalization(axis=-1) + + def call(self, inputs): + x = self.dense1(inputs) + if self.use_dp: + x = self.dp(x) + if self.use_bn: + x = self.bn(x) + return self.dense2(x) + + +class MultiIOTestModel(keras.Model): + + def __init__(self, use_bn=False, use_dp=False, num_classes=(2, 3)): + super(MultiIOTestModel, self).__init__(name='test_model') + self.use_bn = use_bn + self.use_dp = use_dp + self.num_classes = num_classes + + self.dense1 = keras.layers.Dense(32, activation='relu') + self.dense2 = keras.layers.Dense(num_classes[0], activation='softmax') + self.dense3 = keras.layers.Dense(num_classes[1], activation='softmax') + if use_dp: + self.dp = keras.layers.Dropout(0.5) + if use_bn: + self.bn = keras.layers.BatchNormalization() + + def call(self, inputs): + x1, x2 = inputs + x1 = self.dense1(x1) + x2 = self.dense1(x2) + if self.use_dp: + x1 = self.dp(x1) + if self.use_bn: + x2 = self.bn(x2) + return [self.dense2(x1), self.dense3(x2)] + + +class NestedTestModel1(keras.Model): + """A model subclass nested inside a model subclass. + """ + + def __init__(self, num_classes=2): + super(NestedTestModel1, self).__init__(name='nested_model_1') + self.num_classes = num_classes + self.dense1 = keras.layers.Dense(32, activation='relu') + self.dense2 = keras.layers.Dense(num_classes, activation='relu') + self.bn = keras.layers.BatchNormalization() + self.test_net = SimpleTestModel(num_classes=4, + use_bn=True, + use_dp=True) + + def call(self, inputs): + x = self.dense1(inputs) + x = self.bn(x) + x = self.test_net(x) # pylint: disable=not-callable + return self.dense2(x) + + +def get_functional_graph_model(input_dim, num_classes): + # A simple functional-API model (a.k.a. graph network) + inputs = keras.Input(shape=(input_dim,)) + x = keras.layers.Dense(32, activation='relu')(inputs) + x = keras.layers.BatchNormalization()(x) + outputs = keras.layers.Dense(num_classes)(x) + return keras.Model(inputs, outputs) + + +class NestedTestModel2(keras.Model): + """A model subclass with a functional-API graph network inside. + """ + + def __init__(self, num_classes=2): + super(NestedTestModel2, self).__init__(name='nested_model_2') + self.num_classes = num_classes + self.dense1 = keras.layers.Dense(32, activation='relu') + self.dense2 = keras.layers.Dense(num_classes, activation='relu') + self.bn = self.bn = keras.layers.BatchNormalization() + self.test_net = get_functional_graph_model(32, 4) + + def call(self, inputs): + x = self.dense1(inputs) + x = self.bn(x) + x = self.test_net(x) + return self.dense2(x) + + +def get_nested_model_3(input_dim, num_classes): + # A functional-API model with a subclassed model inside. + # NOTE: this requires the inner subclass to implement `compute_output_shape`. + + inputs = keras.Input(shape=(input_dim,)) + x = keras.layers.Dense(32, activation='relu')(inputs) + x = keras.layers.BatchNormalization()(x) + + class Inner(keras.Model): + + def __init__(self): + super(Inner, self).__init__() + self.dense1 = keras.layers.Dense(32, activation='relu') + self.dense2 = keras.layers.Dense(5, activation='relu') + self.bn = keras.layers.BatchNormalization() + + def call(self, inputs): + x = self.dense1(inputs) + x = self.dense2(x) + return self.bn(x) + + def compute_output_shape(self, input_shape): + return tensor_shape.TensorShape((input_shape[0], 5)) + + test_model = Inner() + x = test_model(x) # pylint: disable=not-callable + outputs = keras.layers.Dense(num_classes)(x) + return keras.Model(inputs, outputs, name='nested_model_3') + + +class ModelSubclassingTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def test_single_io_workflow_with_np_arrays(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + with self.test_session(): + model = SimpleTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + @test_util.run_in_graph_and_eager_modes() + def test_multi_io_workflow_with_np_arrays(self): + num_classes = (2, 3) + num_samples = 1000 + input_dim = 50 + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + + def test_single_io_workflow_with_tensors(self): + + num_classes = 2 + num_samples = 10 + input_dim = 50 + + with self.test_session(): + model = SimpleTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + + x = array_ops.ones((num_samples, input_dim)) + y = array_ops.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, steps_per_epoch=10, verbose=0) + _ = model.evaluate(steps=10, verbose=0) + + def test_multi_io_workflow_with_tensors(self): + + num_classes = (2, 3) + num_samples = 10 + input_dim = 50 + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + + x1 = array_ops.ones((num_samples, input_dim)) + x2 = array_ops.ones((num_samples, input_dim)) + y1 = array_ops.zeros((num_samples, num_classes[0])) + y2 = array_ops.zeros((num_samples, num_classes[1])) + + model.fit([x1, x2], [y1, y2], epochs=2, steps_per_epoch=10, verbose=0) + _ = model.evaluate(steps=10, verbose=0) + + def test_multi_io_workflow_with_numpy_arrays_and_custom_placeholders(self): + + num_classes = (2, 3) + num_samples = 1000 + input_dim = 50 + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + x2_placeholder = array_ops.placeholder( + dtype='float32', shape=(None, input_dim)) + model._set_inputs([x1, x2_placeholder]) + + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_attributes(self): + # layers, weights, trainable_weights, non_trainable_weights, inputs, outputs + + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + self.assertEqual(model.name, 'test_model') + self.assertEqual(model.built, False) + self.assertEqual(len(model.weights), 0) + + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.train_on_batch([x1, x2], [y1, y2]) + + self.assertEqual(model.built, True) + self.assertEqual(len(model.layers), 4) + self.assertEqual(len(model.weights), 10) + self.assertEqual(len(model.trainable_weights), 8) + self.assertEqual(len(model.non_trainable_weights), 2) + self.assertEqual(len(model.inputs), 2) + self.assertEqual(len(model.outputs), 2) + + @test_util.run_in_graph_and_eager_modes() + def test_updates(self): + # test that updates get run during training + num_samples = 100 + input_dim = 50 + + class BNNet(keras.Model): + + def __init__(self): + super(BNNet, self).__init__() + self.bn = keras.layers.BatchNormalization(beta_initializer='ones', + gamma_initializer='ones') + + def call(self, inputs): + return self.bn(inputs) + + x = np.ones((num_samples, input_dim)) + y = np.ones((num_samples, input_dim)) + + with self.test_session(): + model = BNNet() + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + y_ref = model.predict(x) + + model.train_on_batch(x, y) + y_new = model.predict(x) + self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) + + @test_util.run_in_graph_and_eager_modes() + def test_training_and_inference_behavior(self): + # test that dropout is applied in training and not inference + + num_samples = 100 + input_dim = 50 + + class DPNet(keras.Model): + + def __init__(self): + super(DPNet, self).__init__() + self.dp = keras.layers.Dropout(0.5) + self.dense = keras.layers.Dense(1, + use_bias=False, + kernel_initializer='ones') + + def call(self, inputs): + x = self.dp(inputs) + return self.dense(x) + + with self.test_session(): + model = DPNet() + x = np.ones((num_samples, input_dim)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) + + @test_util.run_in_graph_and_eager_modes() + def test_training_methods(self): + # test fit, train_on_batch + # on different input types: list, dict + + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32) + model.fit({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}, + epochs=2, batch_size=32) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, + validation_data=([x1, x2], [y1, y2])) + + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.train_on_batch([x1, x2], [y1, y2]) + model.train_on_batch({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_inference_methods(self): + # test predict, evaluate, test_on_batch, predict_on_batch + # on different input types: list, dict + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.evaluate([x1, x2], [y1, y2]) + model.test_on_batch([x1, x2], [y1, y2]) + + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict([x1, x2]) + + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict_on_batch([x1, x2]) + + @test_util.run_in_graph_and_eager_modes() + def test_trainable_mutation(self): + # test that you can change `trainable` on a model or layer, and that + # it freezes the model state during training + # TODO(fchollet): add test after we unify BN behavior in eager and symbolic. + pass + + @test_util.run_in_graph_and_eager_modes() + def test_saving(self): + if h5py is None: + return # Skip test if models cannot be saved. + + num_classes = (2, 3) + num_samples = 100 + input_dim = 50 + + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) + + with self.test_session(): + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32) + y_ref_1, y_ref_2 = model.predict([x1, x2]) + + fd, fname = tempfile.mkstemp('.h5') + model.save_weights(fname) + + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + # need to build the model before loading weights + # (otherwise no weights to load) + model._set_inputs([x1, x2]) + model.load_weights(fname) + + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) + os.close(fd) + os.remove(fname) + + @test_util.run_in_graph_and_eager_modes() + def test_summary(self): + + class ToString(object): + + def __init__(self): + self.contents = '' + + def __call__(self, msg): + self.contents += msg + '\n' + + # Single-io + model = SimpleTestModel(num_classes=4, use_bn=True, use_dp=True) + model._set_inputs(np.ones((3, 4))) # need to build model first + print_fn = ToString() + model.summary(print_fn=print_fn) + self.assertTrue('Trainable params: 356' in print_fn.contents) + + # Multi-io + model = MultiIOTestModel(num_classes=(5, 6), use_bn=True, use_dp=True) + model._set_inputs([np.ones((3, 4)), + np.ones((3, 4))]) # need to build model first + print_fn = ToString() + model.summary(print_fn=print_fn) + self.assertTrue('Trainable params: 587' in print_fn.contents) + + @test_util.run_in_graph_and_eager_modes() + def test_subclass_nested_in_subclass(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + with self.test_session(): + model = NestedTestModel1(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) + + @test_util.run_in_graph_and_eager_modes() + def test_graph_nested_in_subclass(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + with self.test_session(): + model = NestedTestModel2(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) + + @test_util.run_in_graph_and_eager_modes() + def test_subclass_nested_in_graph(self): + num_classes = 2 + num_samples = 100 + input_dim = 50 + + with self.test_session(): + model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) + + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) + + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) + + self.assertEqual(len(model.weights), 16) + self.assertEqual( + len(model.non_trainable_weights), 4) + self.assertEqual(len(model.trainable_weights), 12) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py index f5d44ef669..4c3ec7dbe4 100644 --- a/tensorflow/python/keras/_impl/keras/models.py +++ b/tensorflow/python/keras/_impl/keras/models.py @@ -406,7 +406,9 @@ class Sequential(Model): """ def __init__(self, layers=None, name=None): - self.layers = [] # Stack of layers. + self._is_graph_network = True + self._is_compiled = False + self._layers = [] # Stack of layers. self.model = None # Internal Model instance. self.inputs = [] # List of input tensors self.outputs = [] # List of length 1: the output tensor (unique). @@ -527,7 +529,7 @@ class Sequential(Model): self._inbound_nodes[0].output_tensors = self.outputs self._inbound_nodes[0].output_shapes = [K.int_shape(self.outputs[0])] - self.layers.append(layer) + self._layers.append(layer) self.built = False def pop(self): diff --git a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py index a9c8fa68c9..4c8009dfd8 100644 --- a/tensorflow/python/keras/_impl/keras/utils/layer_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/layer_utils.py @@ -59,6 +59,10 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): if model.__class__.__name__ == 'Sequential': sequential_like = True + elif not model._is_graph_network: + # We treat subclassed models as a simple sequence of layers, for logging + # purposes. + sequential_like = True else: sequential_like = True nodes_by_depth = model._nodes_by_depth.values() @@ -118,17 +122,24 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): print_fn('=' * line_length) def print_layer_summary(layer): + """Prints a summary for a single layer. + + Arguments: + layer: target layer. + """ try: output_shape = layer.output_shape except AttributeError: output_shape = 'multiple' + except RuntimeError: # output_shape unknown in Eager mode. + output_shape = '?' name = layer.name cls_name = layer.__class__.__name__ fields = [name + ' (' + cls_name + ')', output_shape, layer.count_params()] print_row(fields, positions) def print_layer_summary_with_connections(layer): - """Prints a summary for a single layer. + """Prints a summary for a single layer (including topological connections). Arguments: layer: target layer. diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 0d78ef25f2..38b75e6d3c 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -730,12 +730,10 @@ class Layer(object): activity_regularization = self._activity_regularizer(output) self.add_loss(activity_regularization, inputs=inputs) - if not in_deferred_mode: - # TODO(fchollet): consider how masking will work with deferred mode. - # Handle mask computation and propagation to the next layer. + # TODO(fchollet): consider enabling masking for Eager mode. if hasattr(self, 'compute_mask'): output_mask = self.compute_mask(inputs, previous_mask) - if isinstance(outputs, list): + if isinstance(outputs, (list, tuple)): if output_mask is None: output_mask = [None for _ in range(len(outputs))] for x, m in zip(outputs, output_mask): diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py index 499f53d21b..eeb3276f0c 100644 --- a/tensorflow/python/layers/network.py +++ b/tensorflow/python/layers/network.py @@ -223,13 +223,36 @@ class GraphNetwork(base.Layer): - get_layer: retrieves a child layer by name or index in the graph. Raises: - RuntimeError: If created in Eager mode. + TypeError: If created when eager execution is enabled, with inputs that + don't come from a call to `Input` or outputs that don't come from layers. """ def __init__(self, inputs, outputs, name=None): # pylint: disable=super-init-not-called + if isinstance(inputs, (list, tuple)): + self.inputs = list(inputs) # Tensor or list of tensors. + else: + self.inputs = [inputs] + if isinstance(outputs, (list, tuple)): + self.outputs = list(outputs) + else: + self.outputs = [outputs] + if context.in_eager_mode(): - # TODO(fchollet): check that all inputs and outputs are DeferredTensors. - pass + # Check that all inputs/outputs are DeferredTensors. + for tensor in self.inputs: + if not isinstance(tensor, base._DeferredTensor): # pylint: disable=protected-access + raise TypeError('When eager execution is enabled, ' + 'inputs must come from a call to ' + '`tf.keras.Input` (called after ' + 'tfe.enable_eager_execution()). ' + 'Received invalid input: ' + str(tensor)) + for tensor in self.outputs: + if not isinstance(tensor, base._DeferredTensor): # pylint: disable=protected-access + raise TypeError('When eager execution is enabled, ' + 'outputs must come from a call to ' + 'a layer (called after ' + 'tfe.enable_eager_execution()). ' + 'Received invalid output: ' + str(tensor)) self._init_set_name(name) self._activity_regularizer = None @@ -250,6 +273,7 @@ class GraphNetwork(base.Layer): self.built = True # A GraphNetwork does not create weights of its own, thus has no dtype. self._dtype = None + self._is_graph_network = True # The following are implemented as property functions: # self.trainable_weights # self.non_trainable_weights @@ -262,18 +286,9 @@ class GraphNetwork(base.Layer): self._reuse = None self._graph = ops.get_default_graph() - # GraphNetwork-specific properties. - if isinstance(inputs, (list, tuple)): - self.inputs = list(inputs) # Tensor or list of tensors. - else: - self.inputs = [inputs] - if isinstance(outputs, (list, tuple)): - self.outputs = list(outputs) - else: - self.outputs = [outputs] # All layers in order of horizontal graph traversal. # Entries are unique. Includes input and output layers. - self.layers = [] + self._layers = [] # Check for redundancy in inputs. if len(set(self.inputs)) != len(self.inputs): @@ -483,7 +498,7 @@ class GraphNetwork(base.Layer): # here we order them by traversal order. layers_for_depth.sort(key=lambda x: layer_indices[x]) layers.extend(layers_for_depth) - self.layers = layers + self._layers = layers self._layers_by_depth = layers_by_depth # Get sorted list of node depths. @@ -542,6 +557,10 @@ class GraphNetwork(base.Layer): input_tensors=self.inputs, output_tensors=self.outputs) + @property + def layers(self): + return self._layers + def get_layer(self, name=None, index=None): """Retrieves a layer based on either its name (unique) or index. @@ -627,6 +646,9 @@ class GraphNetwork(base.Layer): Returns: A list of update ops. """ + if context.in_eager_mode(): + return [] + if not self.trainable and not self.stateful: return [] @@ -636,8 +658,8 @@ class GraphNetwork(base.Layer): # `updates` might contain irrelevant updates, so it needs to be filtered # with respect to inputs the model has been called on. - relevant_inputs = [] - for i in range(len(self._inbound_nodes)): + relevant_inputs = self.inputs or [] + for i in range(1, len(self._inbound_nodes)): inputs = self.get_input_at(i) if isinstance(inputs, list): relevant_inputs += inputs @@ -665,16 +687,13 @@ class GraphNetwork(base.Layer): A list of loss tensors. """ losses = [] - if context.in_eager_mode(): - for layer in self.layers: - losses += layer.losses - return losses - for layer in self.layers: losses += layer.losses + if context.in_eager_mode(): + return losses - relevant_inputs = [] - for i in range(len(self._inbound_nodes)): + relevant_inputs = self.inputs or [] + for i in range(1, len(self._inbound_nodes)): inputs = self.get_input_at(i) if isinstance(inputs, list): relevant_inputs += inputs @@ -716,6 +735,10 @@ class GraphNetwork(base.Layer): A list of `InputSpec` instances (one per input to the model) or a single instance if the model has only one input. """ + # If not a graph network, can't assume anything. + if not self._is_graph_network: + return None + specs = [] for layer in self._input_layers: if layer.input_spec is None: @@ -766,6 +789,9 @@ class GraphNetwork(base.Layer): return outputs def compute_output_shape(self, input_shape): + if not self._is_graph_network: + raise NotImplementedError + if isinstance(input_shape, list): input_shapes = [] for shape in input_shape: diff --git a/tensorflow/python/layers/network_test.py b/tensorflow/python/layers/network_test.py index f46ebdf2af..cc6e8ca9f4 100644 --- a/tensorflow/python/layers/network_test.py +++ b/tensorflow/python/layers/network_test.py @@ -530,7 +530,6 @@ class NetworkTest(test.TestCase): self.assertEqual(len(network.layers), 2) self.assertEqual(network.layers[0].sparse, True) - @test_util.run_in_graph_and_eager_modes() def testMaskingSingleInput(self): class MaskedLayer(base_layers.Layer): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 2bf584fa29..76cf84084f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -38,6 +38,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "layers" + mtype: "" + } member { name: "losses" mtype: "" @@ -108,11 +112,11 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'inputs\', \'outputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_update" @@ -120,7 +124,7 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " } member_method { name: "add_weight" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 0a60968131..fb6c8d70dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -39,6 +39,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "layers" + mtype: "" + } member { name: "losses" mtype: "" @@ -125,7 +129,7 @@ tf_class { } member_method { name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_update" @@ -133,7 +137,7 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " } member_method { name: "add_weight" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 0b816b5863..d8d4eb5ca7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -38,6 +38,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "layers" + mtype: "" + } member { name: "losses" mtype: "" @@ -108,11 +112,11 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'inputs\', \'outputs\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_update" @@ -120,7 +124,7 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " } member_method { name: "add_weight" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 7c1bfcb225..2e044d78bb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -39,6 +39,10 @@ tf_class { name: "input_spec" mtype: "" } + member { + name: "layers" + mtype: "" + } member { name: "losses" mtype: "" @@ -125,7 +129,7 @@ tf_class { } member_method { name: "add_loss" - argspec: "args=[\'self\', \'losses\', \'inputs\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'self\'], varargs=args, keywords=kwargs, defaults=None" } member_method { name: "add_update" @@ -133,7 +137,7 @@ tf_class { } member_method { name: "add_variable" - argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\', \'partitioner\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'True\', \'None\'], " } member_method { name: "add_weight" -- GitLab From 8ed18cfec21df21b2b8d5f2ee37418c236f26931 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 12 Feb 2018 17:58:27 -0800 Subject: [PATCH 104/629] Fix TFLite examples/image_label PiperOrigin-RevId: 185465716 --- .../lite/examples/label_image/bitmap_helpers_impl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h index d57f597875..a908e0c5ee 100644 --- a/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h +++ b/tensorflow/contrib/lite/examples/label_image/bitmap_helpers_impl.h @@ -58,8 +58,11 @@ void resize(T* out, uint8_t* in, int image_height, int image_width, ops::builtin::BuiltinOpResolver resolver; TfLiteRegistration* resize_op = resolver.FindOp(BuiltinOperator_RESIZE_BILINEAR); - interpreter->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, nullptr, - resize_op, nullptr); + auto* params = reinterpret_cast( + malloc(sizeof(TfLiteResizeBilinearParams))); + params->align_corners = false; + interpreter->AddNodeWithParameters({0, 1}, {2}, nullptr, 0, params, resize_op, + nullptr); interpreter->AllocateTensors(); -- GitLab From 30ecfa3abe3c4146c29e1b6ee9a3279efa0833d7 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 12 Feb 2018 18:34:36 -0800 Subject: [PATCH 105/629] [TF:XLA] Implement ScatterNd. Add a helper method for performing scatter operations. Share it between ScatterNd and UnsortedSegmentSum implementations. In passing, add support for negative indices to the UnsortedSegmentSum implementation. Added helper methods for creating XLA while loops. Use the new helper in both Gather and Scatter ops. PiperOrigin-RevId: 185469229 --- tensorflow/compiler/tests/BUILD | 13 ++ .../compiler/tests/scatter_nd_op_test.py | 188 +++++++++++++++++ .../tests/segment_reduction_ops_test.py | 8 + tensorflow/compiler/tf2xla/kernels/BUILD | 4 + .../compiler/tf2xla/kernels/gather_op.cc | 91 +++------ .../compiler/tf2xla/kernels/scatter_nd_op.cc | 121 +++++++++++ .../tf2xla/kernels/scatter_op_helpers.h | 39 ---- .../tf2xla/kernels/segment_reduction_ops.cc | 141 ++----------- tensorflow/compiler/tf2xla/lib/BUILD | 33 +++ tensorflow/compiler/tf2xla/lib/scatter.cc | 190 ++++++++++++++++++ tensorflow/compiler/tf2xla/lib/scatter.h | 53 +++++ tensorflow/compiler/tf2xla/lib/util.cc | 55 +++++ tensorflow/compiler/tf2xla/lib/util.h | 5 + tensorflow/compiler/tf2xla/lib/while_loop.cc | 129 ++++++++++++ tensorflow/compiler/tf2xla/lib/while_loop.h | 74 +++++++ tensorflow/compiler/tf2xla/xla_helpers.cc | 51 +---- 16 files changed, 927 insertions(+), 268 deletions(-) create mode 100644 tensorflow/compiler/tests/scatter_nd_op_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc delete mode 100644 tensorflow/compiler/tf2xla/kernels/scatter_op_helpers.h create mode 100644 tensorflow/compiler/tf2xla/lib/scatter.cc create mode 100644 tensorflow/compiler/tf2xla/lib/scatter.h create mode 100644 tensorflow/compiler/tf2xla/lib/while_loop.cc create mode 100644 tensorflow/compiler/tf2xla/lib/while_loop.h diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index db65ee13d1..acd9cf7bee 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -678,6 +678,19 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "scatter_nd_op_test", + size = "medium", + srcs = ["scatter_nd_op_test.py"], + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "xla_device_test", size = "small", diff --git a/tensorflow/compiler/tests/scatter_nd_op_test.py b/tensorflow/compiler/tests/scatter_nd_op_test.py new file mode 100644 index 0000000000..638946e234 --- /dev/null +++ b/tensorflow/compiler/tests/scatter_nd_op_test.py @@ -0,0 +1,188 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.ops.tf.scatter_nd.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +import numpy as np + +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import errors +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test + + +def _AsType(v, vtype): + return v.astype(vtype) if isinstance(v, np.ndarray) else vtype(v) + + +def _FlatInnerDims(tensor, ndims=2): + shape = list(tensor.shape) + return tensor.reshape( + [functools.reduce(lambda x, y: x * y, shape[:-ndims + 1], 1)] + + shape[-ndims + 1:]) + + +def _FlatOuterDims(tensor, ndims=2): + shape = list(tensor.shape) + return tensor.reshape( + shape[:ndims - 1] + + [functools.reduce(lambda x, y: x * y, shape[ndims - 1:], 1)]) + + +def _NumpyScatterNd(ref, indices, updates, op): + ixdim = indices.shape[-1] + num_updates = indices.size // ixdim + total_nd = len(ref.shape) + slice_size = 1 + for i in range(ixdim, total_nd): + slice_size *= ref.shape[i] + flat_indices = _FlatInnerDims(indices) + flat_updates = updates.reshape((num_updates, slice_size)) + output_flat = _FlatOuterDims(ref, ixdim + 1) + for ix_updates, ix_output in enumerate(flat_indices): + ix_output = tuple(ix_output) + output_flat[ix_output] = op(output_flat[ix_output], + flat_updates[ix_updates]) + return output_flat.reshape(ref.shape) + + +def _NumpyUpdate(indices, updates, shape): + ref = np.zeros(shape, dtype=updates.dtype) + return _NumpyScatterNd(ref, indices, updates, lambda p, u: u) + + +class ScatterNdTest(XLATestCase): + + def _VariableRankTest(self, + np_scatter, + tf_scatter, + vtype, + itype, + repeat_indices=False): + np.random.seed(8) + ref_shapes = [(3, 6), (3, 6), (3, 6, 9), (3, 6, 9), (3, 6, 9), (3, 6, 9)] + indices_shapes = [(2,), (2, 2), (2,), (2, 2), (2, 3), (2, 3, 3)] + for ref_shape, indices_shape in zip(ref_shapes, indices_shapes): + num_updates = indices_shape[0] + ixdim = indices_shape[-1] + + indexable_area_shape = () + for i in range(ixdim): + indexable_area_shape += (ref_shape[i],) + all_indices = [ + list(coord) + for coord, _ in np.ndenumerate(np.empty(indexable_area_shape, vtype)) + ] + np.random.shuffle(all_indices) + indices = np.array(all_indices[:num_updates]) + + if num_updates > 1 and repeat_indices: + indices = indices[:num_updates // 2] + for _ in range(num_updates - num_updates // 2): + indices = np.append( + indices, [indices[np.random.randint(num_updates // 2)]], axis=0) + np.random.shuffle(indices) + indices = _AsType(indices[:num_updates], itype) + + updates_shape = (num_updates,) + for i in range(ixdim, len(ref_shape)): + updates_shape += (ref_shape[i],) + updates = _AsType(np.random.randn(*(updates_shape)), vtype) + + # Scatter via numpy + np_out = np_scatter(indices, updates, ref_shape) + # Scatter via tensorflow + tf_out = tf_scatter(indices, updates, ref_shape) + + self.assertAllClose(np_out, tf_out) + + def _VariableRankTests(self, np_scatter, tf_scatter): + for vtype in self.numeric_types: + for itype in set([np.int32, np.int64]).intersection(set(self.int_types)): + self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) + + def _runScatterNd(self, indices, updates, shape): + with self.test_session(): + updates_placeholder = array_ops.placeholder(updates.dtype) + indices_placeholder = array_ops.placeholder(indices.dtype) + with self.test_scope(): + output = array_ops.scatter_nd(indices_placeholder, updates_placeholder, + shape) + feed_dict = {updates_placeholder: updates, indices_placeholder: indices} + return output.eval(feed_dict=feed_dict) + + def testSimple(self): + indices = np.array([[4], [3], [1], [7]], dtype=np.int32) + updates = np.array([9, 10, 11, 12], dtype=np.float32) + expected = np.array([0, 11, 0, 10, 9, 0, 0, 12], dtype=np.int32) + self.assertAllEqual(expected, self._runScatterNd(indices, updates, [8])) + + def testSimple2(self): + indices = np.array([[1, 0], [1, 1]], dtype=np.int32) + updates = np.array([11., 12.], dtype=np.float32) + expected = np.array([[0., 0.], [11., 12.], [0., 0.]], dtype=np.float32) + self.assertAllEqual(expected, self._runScatterNd(indices, updates, [3, 2])) + + def testSimple3(self): + indices = np.array([[1]], dtype=np.int32) + updates = np.array([[11., 12.]], dtype=np.float32) + expected = np.array([[0., 0.], [11., 12.], [0., 0.]]) + self.assertAllEqual(expected, self._runScatterNd(indices, updates, [3, 2])) + + def testVariableRankUpdate(self): + self._VariableRankTests(_NumpyUpdate, self._runScatterNd) + + def testExtraIndicesDimensions(self): + indices = np.zeros([1, 1, 2], np.int32) + updates = np.zeros([1, 1], np.int32) + expected = np.zeros([2, 2], dtype=np.int32) + self.assertAllEqual(expected, self._runScatterNd(indices, updates, [2, 2])) + + def testRank3InvalidShape1(self): + indices = np.zeros([3, 2, 2], np.int32) + updates = np.zeros([2, 2, 2], np.int32) + with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError, + "Must have updates.shape"): + self._runScatterNd(indices, updates, [2, 2, 2]) + + def testRank3InvalidShape2(self): + indices = np.zeros([2, 2, 1], np.int32) + updates = np.zeros([2, 2], np.int32) + with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError, + "Must have updates.shape"): + self._runScatterNd(indices, updates, [2, 2, 2]) + + def testScatterOutOfRange(self): + updates = np.array([-3, -4, -5]).astype(np.float32) + + # Indices all in range, no problem. + indices = np.array([[2], [0], [5]], dtype=np.int32) + self._runScatterNd(indices, updates, [6]) + + # Indices out of range should not fail. It produces implementation-defined + # output. + indices = np.array([[-1], [0], [5]], dtype=np.int32) + self._runScatterNd(indices, updates, [6]) + indices = np.array([[2], [0], [6]], dtype=np.int32) + self._runScatterNd(indices, updates, [6]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tests/segment_reduction_ops_test.py b/tensorflow/compiler/tests/segment_reduction_ops_test.py index 260a04421b..23bc39cf3f 100644 --- a/tensorflow/compiler/tests/segment_reduction_ops_test.py +++ b/tensorflow/compiler/tests/segment_reduction_ops_test.py @@ -60,6 +60,14 @@ class SegmentReductionOpsTest(XLATestCase): np.array([0, 1, 2, 3, 4, 5], dtype=dtype), np.array([3, 0, 2, 1, 3, 3], dtype=np.int32), 4)) + def testUnsortedSegmentSum1DIndices1DDataNegativeIndices(self): + for dtype in self.numeric_types: + self.assertAllClose( + np.array([0, 3, 2, 5], dtype=dtype), + self.UnsortedSegmentSum( + np.array([0, 1, 2, 3, 4, 5], dtype=dtype), + np.array([3, -1, 2, 1, -1, 3], dtype=np.int32), 4)) + def testUnsortedSegmentSum1DIndices2DDataDisjoint(self): for dtype in self.numeric_types: data = np.array( diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index 4c6b29bd01..d83d576eda 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -64,6 +64,7 @@ tf_kernel_library( "reverse_op.cc", "reverse_sequence_op.cc", "scan_ops.cc", + "scatter_nd_op.cc", "segment_reduction_ops.cc", "select_op.cc", "sendrecv_ops.cc", @@ -96,12 +97,15 @@ tf_kernel_library( "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/lib:batch_dot", "//tensorflow/compiler/tf2xla/lib:cholesky", + "//tensorflow/compiler/tf2xla/lib:scatter", "//tensorflow/compiler/tf2xla/lib:triangular_solve", "//tensorflow/compiler/tf2xla/lib:util", + "//tensorflow/compiler/tf2xla/lib:while_loop", "//tensorflow/compiler/tf2xla/ops:sendrecv_ops", "//tensorflow/compiler/xla:array4d", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:client_library", diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc index e9af1e9c2f..24f7bebdad 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h" +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_context.h" @@ -32,12 +33,12 @@ Status XlaGather(const xla::ComputationDataHandle& input, DataType dtype, DataType index_type, xla::ComputationBuilder* builder, xla::ComputationDataHandle* gather_output) { - // If the indices are N-dimensional, then the last dimension of indices should - // be of size N and correspond to the N indices. - int64 num_axes = 1; + // If the indices are N-dimensional, then the minor dimension of indices + // should be of size N and correspond to the N indices. + int64 num_index_dims = 1; if (indices_are_nd) { CHECK_GE(indices_shape.dims(), 1); - num_axes = indices_shape.dim_size(indices_shape.dims() - 1); + num_index_dims = indices_shape.dim_size(indices_shape.dims() - 1); indices_shape.RemoveLastDims(1); } @@ -46,15 +47,15 @@ Status XlaGather(const xla::ComputationDataHandle& input, // input, the output is returned with shape: // input.shape[:axis] + indices.shape + input.shape[axis+1:] - const int num_indices = indices_shape.num_elements(); + const int64 num_indices = indices_shape.num_elements(); TensorShape input_shape_pre_axis(input_shape); input_shape_pre_axis.RemoveDimRange(axis, input_shape.dims()); TensorShape input_shape_post_axis(input_shape); - input_shape_post_axis.RemoveDimRange(0, axis + num_axes); + input_shape_post_axis.RemoveDimRange(0, axis + num_index_dims); // Each slice of the input tensor has shape: // [, 1, ..., 1, ] TensorShape slice_shape(input_shape); - for (int64 i = 0; i < num_axes; ++i) { + for (int64 i = 0; i < num_index_dims; ++i) { slice_shape.set_dim(axis + i, 1); } @@ -79,7 +80,7 @@ Status XlaGather(const xla::ComputationDataHandle& input, return Status::OK(); } - for (int64 i = 0; i < num_axes; ++i) { + for (int64 i = 0; i < num_index_dims; ++i) { if (input_shape.dim_size(axis + i) == 0) { return errors::InvalidArgument("Gather dimension ", axis + i, " is of size zero in tensor with shape ", @@ -91,57 +92,30 @@ Status XlaGather(const xla::ComputationDataHandle& input, // iteration. If there is an axis dimension, we must leave it alone. std::vector flat_indices_shape = {num_indices}; if (indices_are_nd) { - flat_indices_shape.push_back(num_axes); + flat_indices_shape.push_back(num_index_dims); } // Specify the shape of the loop-carried Tensor tuple. - xla::PrimitiveType ptype; - TF_CHECK_OK(DataTypeToPrimitiveType(dtype, &ptype)); - xla::PrimitiveType idxtype; - TF_CHECK_OK(DataTypeToPrimitiveType(index_type, &idxtype)); - std::vector tuple_shapes( - {// The iteration counter i is a scalar, incremented each iteration. - xla::ShapeUtil::MakeShape(idxtype, {}), - // The input array has shape input_shape. Loop invariant. - xla::ShapeUtil::MakeShape(ptype, input_shape.dim_sizes()), - // The gather indices are reshaped to flat_indices_shape. Loop invariant. - xla::ShapeUtil::MakeShape(idxtype, flat_indices_shape), - // The output array, which is updated on each loop iteration. - xla::ShapeUtil::MakeShape(ptype, loop_out_shape.dim_sizes())}); - xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes); // Construct the initial values of the loop-carried Tensors. - auto init_i = XlaHelpers::Zero(builder, index_type); + auto flat_indices = builder->Reshape(indices, flat_indices_shape); auto init_out = builder->Broadcast(XlaHelpers::Zero(builder, dtype), loop_out_shape.dim_sizes()); - auto flat_indices = builder->Reshape(indices, flat_indices_shape); - auto init = builder->Tuple({init_i, input, flat_indices, init_out}); - - // Construct the while loop condition (i < num_indices) - std::unique_ptr condb = - builder->CreateSubBuilder("GatherWhileCond"); - condb->Lt(condb->GetTupleElement( - condb->Parameter(0, tuple_shape, "GatherWhileTuple"), 0), - XlaHelpers::IntegerLiteral(condb.get(), index_type, num_indices)); - auto cond_status = condb->Build(); - auto cond = cond_status.ConsumeValueOrDie(); + auto init = {input, flat_indices, init_out}; // Construct the while loop body's function. The implementation of gather is: // for i in range(num_indices): // index = dynamic-slice(indices, i) // xi = dynamic-slice(input, index) // output = dynamic-update-slice(output, xi, i) - std::unique_ptr bodyb = - builder->CreateSubBuilder("GatherWhileBody"); - { - // The four loop carried values. - auto loop_tuple = bodyb->Parameter(0, tuple_shape, "GatherWhileTuple"); - auto i = bodyb->GetTupleElement(loop_tuple, 0); - auto input = bodyb->GetTupleElement(loop_tuple, 1); - auto indices = bodyb->GetTupleElement(loop_tuple, 2); - auto output = bodyb->GetTupleElement(loop_tuple, 3); - - auto zero_index = XlaHelpers::Zero(bodyb.get(), index_type); + auto body_fn = [&](xla::ComputationDataHandle i, + gtl::ArraySlice loop_vars, + xla::ComputationBuilder* bodyb) { + auto input = loop_vars[0]; + auto indices = loop_vars[1]; + auto output = loop_vars[2]; + + auto zero_index = XlaHelpers::Zero(bodyb, index_type); // Slice the i-th index from the indices array. xla::ComputationDataHandle index; @@ -150,7 +124,7 @@ Status XlaGather(const xla::ComputationDataHandle& input, // Slice out the entire nd index, if applicable. indices_offset = bodyb->Pad(indices_offset, zero_index, xla::MakeEdgePaddingConfig({{0, 1}})); - index = bodyb->DynamicSlice(indices, indices_offset, {1, num_axes}); + index = bodyb->DynamicSlice(indices, indices_offset, {1, num_index_dims}); index = bodyb->Collapse(index, {0, 1}); } else { index = bodyb->DynamicSlice(indices, indices_offset, {1}); @@ -174,16 +148,16 @@ Status XlaGather(const xla::ComputationDataHandle& input, // Update the output Tensor auto updated_output = bodyb->DynamicUpdateSlice(output, slice_i, out_index); - bodyb->Tuple({bodyb->Add(i, XlaHelpers::One(bodyb.get(), index_type)), - input, indices, updated_output}); - } - auto body_status = bodyb->Build(); - auto body = body_status.ConsumeValueOrDie(); + return std::vector{input, indices, + updated_output}; + }; // Construct the While loop, extract and reshape the output. - auto gather_while = builder->While(cond, body, init); - auto result = builder->GetTupleElement(gather_while, 3); - *gather_output = builder->Reshape(result, out_shape.dim_sizes()); + auto num_indices_value = + XlaHelpers::IntegerLiteral(builder, index_type, num_indices); + TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices_value, body_fn, + init, "gather", builder)); + *gather_output = builder->Reshape(outputs[2], out_shape.dim_sizes()); return Status::OK(); } @@ -250,9 +224,10 @@ class GatherNdOp : public XlaOpKernel { errors::InvalidArgument("params must be at least a vector")); OP_REQUIRES(context, TensorShapeUtils::IsVectorOrHigher(indices_shape), errors::InvalidArgument("indices must be at least a vector")); - const int64 num_axes = indices_shape.dim_size(indices_shape.dims() - 1); + const int64 num_index_dims = + indices_shape.dim_size(indices_shape.dims() - 1); OP_REQUIRES( - context, num_axes <= params_shape.dims(), + context, num_index_dims <= params_shape.dims(), errors::InvalidArgument( "index innermost dimension length must be <= params rank; saw: ", indices_shape.dim_size(indices_shape.dims() - 1), " vs. ", diff --git a/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc new file mode 100644 index 0000000000..8433a29c4e --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/scatter_nd_op.cc @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/scatter.h" +#include "tensorflow/compiler/tf2xla/shape_util.h" +#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/core/framework/kernel_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" + +namespace tensorflow { +namespace { + +// Check whether updates.shape = indices.shape[:batch_dim] + +// buffer_shape[num_index_dims:] +Status ValidateUpdateShape(const TensorShape& buffer_shape, + const TensorShape& indices_shape, + const TensorShape& updates_shape) { + if (indices_shape.dims() < 1) { + return errors::InvalidArgument( + "indices shape must have >= 1 dimension; got ", + indices_shape.DebugString()); + } + + const int64 num_index_dims = indices_shape.dim_size(indices_shape.dims() - 1); + const int64 batch_dim = indices_shape.dims() - 1; + + auto shape_err = [&]() { + return errors::InvalidArgument( + "Must have updates.shape = indices.shape[:batch_dim] + ", + "buffer_shape[num_index_dims:], got updates.shape: ", + updates_shape.DebugString(), + ", indices.shape: ", indices_shape.DebugString(), + ", buffer_shape: ", buffer_shape.DebugString(), + ", num_index_dims: ", num_index_dims, ", and batch_dim: ", batch_dim); + }; + + if (updates_shape.dims() < batch_dim) return shape_err(); + if (buffer_shape.dims() < + num_index_dims + (updates_shape.dims() - batch_dim)) { + return shape_err(); + } + if (updates_shape.dims() != + batch_dim + buffer_shape.dims() - num_index_dims) { + return shape_err(); + } + for (int d = 0; d < batch_dim; ++d) { + if (updates_shape.dim_size(d) != indices_shape.dim_size(d)) { + return shape_err(); + } + } + for (int d = 0; d < updates_shape.dims() - batch_dim; ++d) { + if (updates_shape.dim_size(d + batch_dim) != + buffer_shape.dim_size(d + num_index_dims)) { + return shape_err(); + } + } + return Status::OK(); +} + +class ScatterNdOp : public XlaOpKernel { + public: + explicit ScatterNdOp(OpKernelConstruction* context) : XlaOpKernel(context) {} + + void Compile(XlaOpKernelContext* context) override { + DataType dtype = context->input_type(1); + + TensorShape indices_shape = context->InputShape(0); + TensorShape updates_shape = context->InputShape(1); + + TensorShape buffer_shape; + OP_REQUIRES_OK(context, context->ConstantInputAsShape(2, &buffer_shape)); + + OP_REQUIRES( + context, TensorShapeUtils::IsVectorOrHigher(buffer_shape), + errors::InvalidArgument("Output must be at least 1-D, ", + "got shape: ", buffer_shape.DebugString())); + + OP_REQUIRES( + context, + buffer_shape.num_elements() > 0 || (indices_shape.num_elements() == 0 && + updates_shape.num_elements() == 0), + errors::InvalidArgument( + "Indices and updates specified for empty output. indices shape: ", + indices_shape.DebugString())); + + OP_REQUIRES_OK(context, ValidateUpdateShape(buffer_shape, indices_shape, + updates_shape)); + + xla::ComputationBuilder* builder = context->builder(); + auto buffer = builder->Broadcast(XlaHelpers::Zero(builder, dtype), + buffer_shape.dim_sizes()); + auto indices = context->Input(0); + auto updates = context->Input(1); + auto result = + XlaScatter(buffer, updates, indices, + /*indices_are_vectors=*/true, /*combiner=*/{}, builder); + OP_REQUIRES_OK(context, result.status()); + context->SetOutput(0, result.ValueOrDie()); + } +}; + +REGISTER_XLA_OP(Name("ScatterNd").CompileTimeConstInput("shape"), ScatterNdOp); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/scatter_op_helpers.h b/tensorflow/compiler/tf2xla/kernels/scatter_op_helpers.h deleted file mode 100644 index a5ab7de17a..0000000000 --- a/tensorflow/compiler/tf2xla/kernels/scatter_op_helpers.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -// Helper methods for XLA Scatter Ops. -#ifndef TENSORFLOW_COMPILER_TF2XLA_KERNELS_SCATTER_OP_HELPERS_H_ -#define TENSORFLOW_COMPILER_TF2XLA_KERNELS_SCATTER_OP_HELPERS_H_ - -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/xla/client/client_library.h" -#include "tensorflow/compiler/xla/client/computation_builder.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/util/bcast.h" - -namespace tensorflow { - -// Adds to builder an XLA computation that performs a scatter-add of input (of -// shape input_shape) keyed on indices (of shape indices_shape). The shape -// of the Tensor returned by this is num_segments input_shape[indices.dims():] -// -static xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& input, - const TensorShape& input_shape, const xla::ComputationDataHandle& indices, - const TensorShape& indices_shape, int64 num_segments, DataType dtype, - xla::ComputationBuilder* builder); - -} // namespace tensorflow - -#endif // TENSORFLOW_COMPILER_TF2XLA_KERNELS_SCATTER_OP_HELPERS_H_ diff --git a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc index c220edd588..80d6df6c48 100644 --- a/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/segment_reduction_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,125 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include "tensorflow/compiler/tf2xla/kernels/cwise_ops.h" -#include "tensorflow/compiler/tf2xla/shape_util.h" -#include "tensorflow/compiler/tf2xla/type_util.h" +#include "tensorflow/compiler/tf2xla/lib/scatter.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/compiler/xla/client/computation_builder.h" -#include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/types.h" namespace tensorflow { - -xla::ComputationDataHandle XlaComputeScatterAddDynamicSlice( - XlaOpKernelContext* ctx, const xla::ComputationDataHandle& input, - const TensorShape& input_shape, const xla::ComputationDataHandle& indices, - const TensorShape& indices_shape, int64 num_segments, DataType dtype, - xla::ComputationBuilder* builder) { - // Flatten data for dynamic indexing via indices_1d. - TensorShape input_shape_i(input_shape); - for (int64 d = 0; d < indices_shape.dims(); ++d) { - input_shape_i.RemoveDim(0); - } - TensorShape flat_shape({indices_shape.num_elements()}); - flat_shape.AppendShape(input_shape_i); - - // output is same as flattened input shape with dim_size(0) = num_segments. - TensorShape out_shape(flat_shape); - out_shape.set_dim(0, num_segments); - - // Slices from the input data are same shape as the input data, except dim 0. - TensorShape slice_shape(flat_shape); - slice_shape.set_dim(0, 1); - TensorShape loop_out_slice_shape(out_shape); - loop_out_slice_shape.set_dim(0, 1); - - // Construct the initial values of the loop-carried variables - // Flatten the indices into 1-D for ease of iteration. - auto indices_1d = builder->Reshape(indices, {indices_shape.num_elements()}); - // Flatten the data for ease of indexing via values in indices_1d. - auto data_flat = builder->Reshape(input, flat_shape.dim_sizes()); - - auto init_i = builder->ConstantR0(0); - auto init_out = builder->Broadcast(XlaHelpers::Zero(builder, dtype), - out_shape.dim_sizes()); - - xla::PrimitiveType ptype; - TF_CHECK_OK(DataTypeToPrimitiveType(dtype, &ptype)); - - std::vector tuple_shapes( - {// The loop iteration counter is a scalar, incremented each iteration. - xla::ShapeUtil::MakeShape(xla::S32, {}), - // The flattened input data is loop invariant. - xla::ShapeUtil::MakeShape(ptype, flat_shape.dim_sizes()), - // The scatter indices tensor is loop invariant. - xla::ShapeUtil::MakeShape(xla::S32, {indices_shape.num_elements()}), - // The output data array is updated each loop iteration. - xla::ShapeUtil::MakeShape(ptype, out_shape.dim_sizes())}); - xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(tuple_shapes); - - auto init = builder->Tuple({init_i, data_flat, indices_1d, init_out}); - - // Construct the while loop condition (i < num_indices) - xla::ComputationBuilder condb(ctx->builder()->client(), - "ScatterAddWhileCond"); - condb.Lt(condb.GetTupleElement( - condb.Parameter(0, tuple_shape, "ScatterAddWhileTuple"), 0), - condb.ConstantR0(indices_shape.num_elements())); - auto cond_status = condb.Build(); - auto cond = cond_status.ConsumeValueOrDie(); - - // Construct the while loop body's function. The implementation of scatter is: - // for i in range(num_indices): - // index = dynamic-slice(indices, i) - // xi = dynamic-slice(input, i) - // output = dynamic-update-slice(output, xi, index) - xla::ComputationBuilder bodyb(ctx->builder()->client(), - "ScatterAddWhileBody"); - { - auto input_tuple = bodyb.Parameter(0, tuple_shape, "ScatterAddWhileTuple"); - auto i = bodyb.GetTupleElement(input_tuple, 0); - auto data = bodyb.GetTupleElement(input_tuple, 1); - auto idcs = bodyb.GetTupleElement(input_tuple, 2); - auto output = bodyb.GetTupleElement(input_tuple, 3); - - // Index into the data array at i. - auto zero = bodyb.ConstantR1({0}); - std::vector index_vals(flat_shape.dims(), zero); - index_vals[0] = bodyb.Reshape(i, {1}); - auto index = bodyb.ConcatInDim(index_vals, 0); - - auto data_slice = - bodyb.Reshape(bodyb.DynamicSlice(data, index, slice_shape.dim_sizes()), - loop_out_slice_shape.dim_sizes()); - - // Index into the output array. - std::vector out_index_vals(out_shape.dims(), - zero); - out_index_vals[0] = bodyb.DynamicSlice(idcs, bodyb.Reshape(i, {1}), {1}); - auto out_index = bodyb.ConcatInDim(out_index_vals, 0); - - // Slice the output array, update value, and update the output slice. - auto updated_output = bodyb.DynamicUpdateSlice( - output, - bodyb.Add(data_slice, - bodyb.DynamicSlice(output, out_index, - loop_out_slice_shape.dim_sizes())), - out_index); - - auto ip1 = bodyb.Add(i, bodyb.ConstantR0(1)); - bodyb.Tuple({ip1, data, idcs, updated_output}); - } - auto body_status = bodyb.Build(); - auto body = body_status.ConsumeValueOrDie(); - - auto gather_while = builder->While(cond, body, init); - return builder->GetTupleElement(gather_while, 3); -} - namespace { class UnsortedSegmentSum : public XlaOpKernel { @@ -153,10 +41,10 @@ class UnsortedSegmentSum : public XlaOpKernel { // as data with the first indices.rank dimensions are replaced // by a single dimension with size num_segments. auto data = ctx->Input(0); - auto data_shape = ctx->InputShape(0); + TensorShape data_shape = ctx->InputShape(0); auto indices = ctx->Input(1); - auto indices_shape = ctx->InputShape(1); + TensorShape indices_shape = ctx->InputShape(1); int64 num_segments; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(2, &num_segments)); @@ -174,10 +62,21 @@ class UnsortedSegmentSum : public XlaOpKernel { d, " differs ", data_shape.dim_size(d), " vs. ", indices_shape.dim_size(d))); } - auto result = XlaComputeScatterAddDynamicSlice( - ctx, data, data_shape, indices, indices_shape, num_segments, dtype_, - ctx->builder()); - ctx->SetOutput(0, result); + xla::ComputationBuilder* builder = ctx->builder(); + TensorShape buffer_shape = data_shape; + buffer_shape.RemoveDimRange(0, indices_shape.dims()); + buffer_shape.InsertDim(0, num_segments); + auto buffer = builder->Broadcast(XlaHelpers::Zero(builder, dtype_), + buffer_shape.dim_sizes()); + + auto combiner = + [](xla::ComputationDataHandle a, xla::ComputationDataHandle b, + xla::ComputationBuilder* builder) { return builder->Add(a, b); }; + + auto result = XlaScatter(buffer, /*updates=*/data, indices, + /*indices_are_vectors=*/false, combiner, builder); + OP_REQUIRES_OK(ctx, result.status()); + ctx->SetOutput(0, result.ValueOrDie()); } private: diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index d184f59e01..da2fdd1d34 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -49,6 +49,25 @@ cc_library( ], ) +cc_library( + name = "scatter", + srcs = ["scatter.cc"], + hdrs = ["scatter.h"], + deps = [ + ":util", + ":while_loop", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/compiler/xla/client/lib:arithmetic", + "//tensorflow/core:lib", + ], +) + cc_library( name = "triangular_solve", srcs = ["triangular_solve.cc"], @@ -107,6 +126,20 @@ cc_library( ], ) +cc_library( + name = "while_loop", + srcs = ["while_loop.cc"], + hdrs = ["while_loop.h"], + deps = [ + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:computation", + "//tensorflow/compiler/xla/client:computation_builder", + "//tensorflow/core:lib", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc new file mode 100644 index 0000000000..e8026ecc8d --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/scatter.cc @@ -0,0 +1,190 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/scatter.h" + +#include +#include + +#include "tensorflow/compiler/tf2xla/lib/util.h" +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tensorflow { + +xla::StatusOr XlaScatter( + const xla::ComputationDataHandle& buffer, + const xla::ComputationDataHandle& updates, + const xla::ComputationDataHandle& indices, bool indices_are_vectors, + const std::function& combiner, + xla::ComputationBuilder* builder) { + TF_ASSIGN_OR_RETURN(std::unique_ptr buffer_shape, + builder->GetShape(buffer)); + TF_ASSIGN_OR_RETURN(std::unique_ptr updates_shape, + builder->GetShape(updates)); + TF_ASSIGN_OR_RETURN(std::unique_ptr indices_shape, + builder->GetShape(indices)); + gtl::ArraySlice indices_dims = + xla::AsInt64Slice(indices_shape->dimensions()); + gtl::ArraySlice buffer_dims = + xla::AsInt64Slice(buffer_shape->dimensions()); + + // If the indices are N-dimensional, the minor dimension of indices contains + // the indices to update. Otherwise the indices are all scalars. + int64 num_index_dims = 1; + if (indices_are_vectors) { + TF_RET_CHECK(!indices_dims.empty()); + num_index_dims = indices_dims.back(); + if (num_index_dims > xla::ShapeUtil::Rank(*buffer_shape)) { + return errors::InvalidArgument( + "The size of the minor dimension of the indices (shape: ", + xla::ShapeUtil::HumanString(*indices_shape), + ") must be <= the rank of the buffer (shape: ", + xla::ShapeUtil::HumanString(*buffer_shape), ")"); + } + indices_dims.pop_back(); + } + + int64 num_indices = 1; + for (int64 dim : indices_dims) { + num_indices *= dim; + } + + // Degenerate case: nothing to update. Return the buffer unchanged. + if (num_indices == 0) { + return buffer; + } + + // If any of the indexed dimensions are zero in the buffer, the update cannot + // succeed since it updates a slice of size 1. + for (int64 i = 0; i < num_index_dims; ++i) { + if (xla::ShapeUtil::GetDimension(*buffer_shape, i) == 0) { + return errors::InvalidArgument( + "Scatter dimension ", i, " is of size zero in tensor with shape ", + xla::ShapeUtil::HumanString(*buffer_shape)); + } + } + + // Shape of the non-indexed dimensions of the buffer. + std::vector buffer_shape_post_axes( + buffer_dims.begin() + num_index_dims, buffer_dims.end()); + + // Flatten the major dimensions of indices and updates into a single dimension + // for ease of iteration. + std::vector flat_indices_shape({num_indices}); + if (indices_are_vectors) { + flat_indices_shape.push_back(num_index_dims); + } + + std::vector flat_updates_shape({num_indices}); + flat_updates_shape.insert(flat_updates_shape.end(), + buffer_shape_post_axes.begin(), + buffer_shape_post_axes.end()); + + // Construct the initial values of the loop-carried Tensors. + auto flat_indices = builder->Reshape(indices, flat_indices_shape); + auto flat_updates = builder->Reshape(updates, flat_updates_shape); + auto init = {flat_indices, flat_updates, buffer}; + + // Constructs the loop body. The implementation of scatter is essentially: + // for i in range(num_indices): + // index = dynamic-slice(indices, i) + // update = dynamic-slice(updates, i) + // buffer = dynamic-update-slice(buffer, update, index) + auto body_fn = [&](xla::ComputationDataHandle i, + gtl::ArraySlice loop_vars, + xla::ComputationBuilder* body_builder) { + auto indices = loop_vars[0]; + auto updates = loop_vars[1]; + auto buffer = loop_vars[2]; + + auto zero_index = body_builder->ConstantLiteral( + xla::Literal::Zero(indices_shape->element_type())); + + // Slice the i-th index from the indices array. + xla::ComputationDataHandle index; + auto indices_offset = body_builder->Reshape(i, {1}); + if (indices_are_vectors) { + indices_offset = body_builder->Pad(indices_offset, zero_index, + xla::MakeEdgePaddingConfig({{0, 1}})); + + index = body_builder->DynamicSlice(indices, indices_offset, + {1, num_index_dims}); + index = body_builder->Collapse(index, {0, 1}); + } else { + index = body_builder->DynamicSlice(indices, indices_offset, {1}); + } + + // Discard updates with negative indices, since some users expect this. + auto index_in_range = + body_builder->ReduceAll(body_builder->Le(zero_index, index), + body_builder->ConstantR0(true), + xla::CreateScalarAndComputation(body_builder)); + + index = body_builder->Pad( + index, zero_index, + xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); + + // Slice the i-th index from the updates array. + auto updates_offset = body_builder->Reshape(i, {1}); + updates_offset = body_builder->Pad( + updates_offset, zero_index, + xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); + std::vector flat_updates_slice_shape({1}); + flat_updates_slice_shape.insert(flat_updates_slice_shape.end(), + buffer_shape_post_axes.begin(), + buffer_shape_post_axes.end()); + auto update = body_builder->DynamicSlice(updates, updates_offset, + flat_updates_slice_shape); + + // Unflatten the major (iteration) dimensions of the slice to their original + // shape. + std::vector updates_slice_shape(num_index_dims, 1); + updates_slice_shape.insert(updates_slice_shape.end(), + buffer_shape_post_axes.begin(), + buffer_shape_post_axes.end()); + update = body_builder->Reshape(update, updates_slice_shape); + + // Apply the update to the buffer. If there is a combiner, use it to merge + // the current values with the update. + if (combiner) { + auto current_value = + body_builder->DynamicSlice(buffer, index, updates_slice_shape); + update = combiner(current_value, update, body_builder); + } + // Apply the update if it is in range. + buffer = body_builder->Select( + index_in_range, body_builder->DynamicUpdateSlice(buffer, update, index), + buffer); + + return std::vector{indices, updates, buffer}; + }; + + xla::ComputationDataHandle num_indices_value = + IntegerLiteral(builder, indices_shape->element_type(), num_indices); + TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices_value, body_fn, + init, "scatter", builder)); + return outputs[2]; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h new file mode 100644 index 0000000000..41e6d3b195 --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/scatter.h @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_SCATTER_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_SCATTER_H_ + +#include + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace tensorflow { + +// Builds an XLA computation that performs a scatter operation on `buffer`, +// returning an updated buffer. +// For each i0, i1, ..., sets +// buffer[indices[i0, i1, ...], ...] := updates[i0, i1, ...] +// +// If `indices_are_vectors` is false, then each index in indices is a scalar, +// and the shape of `indices` must be a prefix of the shape of updates. +// Otherwise, `indices_are_vectors`, then indices are multidimensional and the +// minor dimension of `indices` represents a vector of indices. +// +// If any indices are negative, the corresponding update is discarded. +// +// If a `combiner` is provided, updates are combined with the existing values in +// the buffer using the combiner function. Otherwise, the updates replace the +// existing values. The order of updates is implementation-defined. +xla::StatusOr XlaScatter( + const xla::ComputationDataHandle& buffer, + const xla::ComputationDataHandle& updates, + const xla::ComputationDataHandle& indices, bool indices_are_vectors, + const std::function& combiner, + xla::ComputationBuilder* builder); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_SCATTER_H_ diff --git a/tensorflow/compiler/tf2xla/lib/util.cc b/tensorflow/compiler/tf2xla/lib/util.cc index 9b7492f8cf..f579669bbd 100644 --- a/tensorflow/compiler/tf2xla/lib/util.cc +++ b/tensorflow/compiler/tf2xla/lib/util.cc @@ -57,6 +57,61 @@ xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, } } +xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, + xla::PrimitiveType type, + int64 value) { + xla::Literal literal; + switch (type) { + case xla::U8: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::U32: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::U64: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::S8: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::S32: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::S64: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::F32: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::F64: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::C64: + literal = std::move(*xla::Literal::CreateR0(value)); + break; + case xla::PRED: + LOG(FATAL) << "pred element type is not integral"; + case xla::S16: + case xla::U16: + LOG(FATAL) << "u16/s16 literals not yet implemented"; + case xla::BF16: + literal = std::move( + *xla::Literal::CreateR0(static_cast(value))); + break; + case xla::F16: + literal = std::move( + *xla::Literal::CreateR0(static_cast(value))); + break; + case xla::TUPLE: + LOG(FATAL) << "tuple element type is not integral"; + case xla::OPAQUE: + LOG(FATAL) << "opaque element type is not integral"; + default: + LOG(FATAL) << "unhandled element type " << type; + } + return builder->ConstantLiteral(literal); +} + xla::StatusOr SliceInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, gtl::ArraySlice start, gtl::ArraySlice end) { diff --git a/tensorflow/compiler/tf2xla/lib/util.h b/tensorflow/compiler/tf2xla/lib/util.h index 7f93102ee7..51f8baaf00 100644 --- a/tensorflow/compiler/tf2xla/lib/util.h +++ b/tensorflow/compiler/tf2xla/lib/util.h @@ -32,6 +32,11 @@ xla::ComputationDataHandle Zeros(xla::ComputationBuilder* builder, xla::ComputationDataHandle FloatLiteral(xla::ComputationBuilder* builder, xla::PrimitiveType type, double value); +// Returns a integer scalar constant of 'type' with 'value'. +// If 'type' is complex, returns a real value with zero imaginary component. +xla::ComputationDataHandle IntegerLiteral(xla::ComputationBuilder* builder, + xla::PrimitiveType type, int64 value); + // Performs a slice in the minor dimensions of a Tensor. xla::StatusOr SliceInMinorDims( xla::ComputationBuilder* builder, const xla::ComputationDataHandle& x, diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.cc b/tensorflow/compiler/tf2xla/lib/while_loop.cc new file mode 100644 index 0000000000..d35f6cd13f --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/while_loop.cc @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" + +namespace tensorflow { + +xla::StatusOr> XlaWhileLoop( + const LoopConditionFunction& condition_function, + const LoopBodyFunction& body_function, + gtl::ArraySlice initial_values, + StringPiece name, xla::ComputationBuilder* builder) { + int arity = initial_values.size(); + std::vector var_shapes; + var_shapes.reserve(arity); + for (const xla::ComputationDataHandle& input : initial_values) { + TF_ASSIGN_OR_RETURN(auto shape, builder->GetShape(input)); + var_shapes.push_back(std::move(*shape)); + } + xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(var_shapes); + + // Unpacks a tuple into its component parts. + auto unpack_tuple = [](xla::ComputationDataHandle tuple, int arity, + xla::ComputationBuilder* builder) { + std::vector elements(arity); + for (int i = 0; i < arity; ++i) { + elements[i] = builder->GetTupleElement(tuple, i); + } + return elements; + }; + + // Build the condition. + std::unique_ptr cond_builder = + builder->CreateSubBuilder(strings::StrCat(name, "_condition")); + { + auto parameter = cond_builder->Parameter(0, tuple_shape, "parameter"); + + TF_ASSIGN_OR_RETURN( + auto result, + condition_function(unpack_tuple(parameter, arity, cond_builder.get()), + cond_builder.get())); + TF_RETURN_IF_ERROR(cond_builder->SetReturnValue(result)); + } + TF_ASSIGN_OR_RETURN(auto cond, cond_builder->Build()); + + // Build the body. + std::unique_ptr body_builder = + builder->CreateSubBuilder(strings::StrCat(name, "_body")); + { + auto parameter = body_builder->Parameter(0, tuple_shape, "parameter"); + + TF_ASSIGN_OR_RETURN( + auto result, + body_function(unpack_tuple(parameter, arity, body_builder.get()), + body_builder.get())); + + TF_RET_CHECK(result.size() == initial_values.size()); + body_builder->Tuple(result); + } + TF_ASSIGN_OR_RETURN(auto body, body_builder->Build()); + + auto outputs = builder->While(cond, body, builder->Tuple(initial_values)); + + return unpack_tuple(outputs, arity, builder); +} + +xla::StatusOr> XlaForEachIndex( + const xla::ComputationDataHandle& num_iterations, + const ForEachIndexBodyFunction& body_function, + gtl::ArraySlice initial_values, + StringPiece name, xla::ComputationBuilder* builder) { + TF_ASSIGN_OR_RETURN(std::unique_ptr num_iterations_shape, + builder->GetShape(num_iterations)); + TF_RET_CHECK(xla::ShapeUtil::IsScalar(*num_iterations_shape)); + xla::PrimitiveType num_iterations_type = num_iterations_shape->element_type(); + + auto while_cond_fn = [&](gtl::ArraySlice values, + xla::ComputationBuilder* cond_builder) + -> xla::StatusOr { + return cond_builder->Lt(values[0], values[1]); + }; + auto while_body_fn = [&](gtl::ArraySlice values, + xla::ComputationBuilder* body_builder) + -> xla::StatusOr> { + xla::ComputationDataHandle iteration = values[0]; + + std::vector updated_values; + updated_values.reserve(values.size()); + updated_values.push_back(body_builder->Add( + iteration, + body_builder->ConstantLiteral(xla::Literal::One(num_iterations_type)))); + updated_values.push_back(values[1]); + + values.remove_prefix(2); + TF_ASSIGN_OR_RETURN(std::vector body_outputs, + body_function(iteration, values, body_builder)); + updated_values.insert(updated_values.end(), body_outputs.begin(), + body_outputs.end()); + return updated_values; + }; + + std::vector values; + values.reserve(initial_values.size() + 2); + values.push_back( + builder->ConstantLiteral(xla::Literal::Zero(num_iterations_type))); + values.push_back(num_iterations); + values.insert(values.end(), initial_values.begin(), initial_values.end()); + + TF_ASSIGN_OR_RETURN(values, XlaWhileLoop(while_cond_fn, while_body_fn, values, + name, builder)); + values.erase(values.begin(), values.begin() + 2); + return values; +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.h b/tensorflow/compiler/tf2xla/lib/while_loop.h new file mode 100644 index 0000000000..562a589fbf --- /dev/null +++ b/tensorflow/compiler/tf2xla/lib/while_loop.h @@ -0,0 +1,74 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_ +#define TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_ + +#include +#include + +#include "tensorflow/compiler/xla/client/computation.h" +#include "tensorflow/compiler/xla/client/computation_builder.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tensorflow { + +// Function that builds a loop condition. Takes as input a sequence of input +// values, and returns a boolean value representing if the condition succeeds. +typedef std::function( + gtl::ArraySlice, xla::ComputationBuilder*)> + LoopConditionFunction; + +// Function that builds a loop body. Takes as input a sequence of input values +// and returns a sequence of output values. +typedef std::function>( + gtl::ArraySlice, xla::ComputationBuilder*)> + LoopBodyFunction; + +// Helper function for building an XLA while loop, where the values carried by +// the loop are a tuple of values, e.g., (a, b, c): +// while( +// condition: (a, b, c) -> bool, +// body: (a, b, c) -> (a, b, c) +// init: (a, b, c) +// ) +// 'name' is a descriptive name for the loop. +xla::StatusOr> XlaWhileLoop( + const LoopConditionFunction& condition_function, + const LoopBodyFunction& body_function, + gtl::ArraySlice initial_values, + StringPiece name, xla::ComputationBuilder* builder); + +// Builds an XLA loop that repeats a computation `num_iterations` times. +// +// The body function (ForEachIndexBodyFunction) takes as input a pair of +// (current iteration number, loop-carried values), and returns an updated +// vector of the loop-carried values. +typedef std::function>( + xla::ComputationDataHandle, gtl::ArraySlice, + xla::ComputationBuilder*)> + ForEachIndexBodyFunction; + +xla::StatusOr> XlaForEachIndex( + const xla::ComputationDataHandle& num_iterations, + const ForEachIndexBodyFunction& body_function, + gtl::ArraySlice initial_values, + StringPiece name, xla::ComputationBuilder* builder); + +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_TF2XLA_LIB_WHILE_LOOP_H_ diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 77e2416267..f048662953 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -135,58 +135,9 @@ xla::ComputationDataHandle XlaHelpers::Epsilon(xla::ComputationBuilder* b, xla::ComputationDataHandle XlaHelpers::IntegerLiteral( xla::ComputationBuilder* b, DataType data_type, int64 value) { - xla::Literal literal; xla::PrimitiveType type; TF_CHECK_OK(DataTypeToPrimitiveType(data_type, &type)); - switch (type) { - case xla::U8: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::U32: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::U64: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::S8: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::S32: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::S64: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::F32: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::F64: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::C64: - literal = std::move(*xla::Literal::CreateR0(value)); - break; - case xla::PRED: - LOG(FATAL) << "pred element type is not integral"; - case xla::S16: - case xla::U16: - LOG(FATAL) << "u16/s16 literals not yet implemented"; - case xla::BF16: - literal = std::move( - *xla::Literal::CreateR0(static_cast(value))); - break; - case xla::F16: - literal = std::move( - *xla::Literal::CreateR0(static_cast(value))); - break; - case xla::TUPLE: - LOG(FATAL) << "tuple element type is not integral"; - case xla::OPAQUE: - LOG(FATAL) << "opaque element type is not integral"; - default: - LOG(FATAL) << "unhandled element type " << type; - } - return b->ConstantLiteral(literal); + return ::tensorflow::IntegerLiteral(b, type, value); } xla::ComputationDataHandle XlaHelpers::FloatLiteral(xla::ComputationBuilder* b, -- GitLab From 465a45cd3b717908ecbae72b824c91f44c2cdce0 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 12 Feb 2018 21:56:19 -0800 Subject: [PATCH 106/629] [XLA:CPU] Implement vectorized Log in LLVM IR This was the last vectorized intrinsic for which we had to call into C++ so also remove the associated machinery. PiperOrigin-RevId: 185482962 --- tensorflow/compiler/aot/tfcompile.bzl | 3 - tensorflow/compiler/xla/service/cpu/BUILD | 43 ----- .../xla/service/cpu/compiler_functor.cc | 90 +--------- .../xla/service/cpu/compiler_functor.h | 13 -- .../compiler/xla/service/cpu/cpu_compiler.cc | 3 +- .../xla/service/cpu/cpu_runtime_avx.cc | 37 ---- .../xla/service/cpu/cpu_runtime_avx.h | 59 ------ .../xla/service/cpu/cpu_runtime_neon.cc | 46 ----- .../xla/service/cpu/cpu_runtime_neon.h | 62 ------- .../xla/service/cpu/cpu_runtime_sse4_1.cc | 40 ----- .../xla/service/cpu/cpu_runtime_sse4_1.h | 59 ------ .../xla/service/cpu/llvm_ir_runtime.cc | 168 ++++++++++++++++-- .../xla/service/cpu/llvm_ir_runtime.h | 2 + .../xla/service/cpu/simple_orc_jit.cc | 46 +---- .../xla/service/cpu/vector_support_library.cc | 85 ++++++++- .../xla/service/cpu/vector_support_library.h | 77 ++++++-- .../xla/tests/array_elementwise_ops_test.cc | 38 ++++ 17 files changed, 355 insertions(+), 516 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc delete mode 100644 tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h diff --git a/tensorflow/compiler/aot/tfcompile.bzl b/tensorflow/compiler/aot/tfcompile.bzl index eb3e632c7b..9dff1be09f 100644 --- a/tensorflow/compiler/aot/tfcompile.bzl +++ b/tensorflow/compiler/aot/tfcompile.bzl @@ -224,9 +224,6 @@ def tf_library(name, graph, config, # TODO(cwhipkey): only depend on kernel code that the model actually needed. "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_1d", "//tensorflow/compiler/tf2xla/kernels:index_ops_kernel_argmax_float_2d", - "//tensorflow/compiler/xla/service/cpu:cpu_runtime_avx", - "//tensorflow/compiler/xla/service/cpu:cpu_runtime_neon", - "//tensorflow/compiler/xla/service/cpu:cpu_runtime_sse4_1", "//tensorflow/compiler/xla/service/cpu:runtime_conv2d", "//tensorflow/compiler/xla/service/cpu:runtime_matmul", "//tensorflow/compiler/xla/service/cpu:runtime_single_threaded_conv2d", diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 1a91dd8ff7..c13a0b1cdf 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -159,9 +159,6 @@ cc_library( deps = [ ":compiler_functor", ":cpu_runtime", - ":cpu_runtime_avx", - ":cpu_runtime_neon", - ":cpu_runtime_sse4_1", ":custom_call_target_registry", ":disassembler", ":external_constant_pool", @@ -408,9 +405,6 @@ cc_library( hdrs = ["compiler_functor.h"], deps = [ ":cpu_runtime", - ":cpu_runtime_avx", - ":cpu_runtime_neon", - ":cpu_runtime_sse4_1", ":disassembler", ":llvm_ir_runtime", "//tensorflow/compiler/xla:statusor", @@ -430,43 +424,6 @@ cc_library( ], ) -cc_library( - name = "cpu_runtime_sse4_1", - srcs = ["cpu_runtime_sse4_1.cc"], - hdrs = ["cpu_runtime_sse4_1.h"], - copts = ["-DEIGEN_AVOID_STL_ARRAY"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core:framework_lite", - "//third_party/eigen3", - ], -) - -cc_library( - name = "cpu_runtime_avx", - srcs = ["cpu_runtime_avx.cc"], - hdrs = ["cpu_runtime_avx.h"], - copts = ["-DEIGEN_AVOID_STL_ARRAY"], - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core:framework_lite", - "//third_party/eigen3", - ], -) - -cc_library( - name = "cpu_runtime_neon", - srcs = ["cpu_runtime_neon.cc"], - hdrs = ["cpu_runtime_neon.h"], - # runtime_copts() enables -mfpu=neon - copts = ["-DEIGEN_AVOID_STL_ARRAY"] + runtime_copts(), - visibility = ["//visibility:public"], - deps = [ - "//tensorflow/core:framework_lite", - "//third_party/eigen3", - ], -) - cc_library( name = "cpu_runtime", srcs = [ diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc index 2723661712..ed290fcdf8 100644 --- a/tensorflow/compiler/xla/service/cpu/compiler_functor.cc +++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.cc @@ -37,9 +37,6 @@ limitations under the License. #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" #include "tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h" #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/statusor.h" @@ -50,15 +47,6 @@ limitations under the License. namespace xla { namespace cpu { -/* static */ CompilerFunctor::VectorIntrinsics -CompilerFunctor::AllIntrinsics() { - VectorIntrinsics intrinsics; - intrinsics.sse_intrinsics = true; - intrinsics.avx_intrinsics = true; - intrinsics.neon_intrinsics = true; - return intrinsics; -} - /* Create filtered versions of the LLVM Pass Managers to filter out some of the expensive passes. Profiling: @@ -192,31 +180,8 @@ operator()(llvm::Module& module) const { std::move(object_file), std::move(memory_buffer)); } -namespace { -// Returns the set of vectorized library functions supported for the target. -std::vector VectorFunctionsForTargetLibraryInfoImpl( - llvm::Triple::ArchType arch, llvm::StringRef feature_string, - CompilerFunctor::VectorIntrinsics const& available_intrinsics) { - std::vector vector_functions; - - const llvm::VecDesc four_wide_vector_functions_neon[] = { - {"logf", runtime::kLogV4F32NEONSymbolName, 4}, - {"llvm.log.f32", runtime::kLogV4F32NEONSymbolName, 4}, - }; - - const llvm::VecDesc four_wide_vector_functions_sse[] = { - {"logf", runtime::kLogV4F32SSESymbolName, 4}, - {"llvm.log.f32", runtime::kLogV4F32SSESymbolName, 4}, - }; - - const llvm::VecDesc eight_wide_vector_functions_avx[] = { - {"logf", runtime::kLogV8F32AVXSymbolName, 8}, - {"llvm.log.f32", runtime::kLogV8F32AVXSymbolName, 8}, - }; - - // These functions are generated by XLA as LLVM IR, so they're always - // available. - const llvm::VecDesc ir_vector_functions[] = { +static std::vector VectorFunctionsForTargetLibraryInfoImpl() { + std::vector result = { {"tanhf", runtime::kTanhV4F32SymbolName, 4}, {"llvm.tanh.f32", runtime::kTanhV4F32SymbolName, 4}, @@ -228,50 +193,15 @@ std::vector VectorFunctionsForTargetLibraryInfoImpl( {"expf", runtime::kExpV8F32SymbolName, 8}, {"llvm.exp.f32", runtime::kExpV8F32SymbolName, 8}, - }; - llvm::SmallVector features; - feature_string.split(features, ',', -1, /*KeepEmpty=*/false); - auto has_feature = [&features](const llvm::StringRef feature) { - return std::find(features.begin(), features.end(), feature) != - features.end(); - }; - - switch (arch) { - case llvm::Triple::x86: - case llvm::Triple::x86_64: { - if (has_feature("+sse4.1") && available_intrinsics.sse_intrinsics) { - vector_functions.insert(vector_functions.end(), - std::begin(four_wide_vector_functions_sse), - std::end(four_wide_vector_functions_sse)); - } - if (has_feature("+avx") && available_intrinsics.avx_intrinsics) { - vector_functions.insert(vector_functions.end(), - std::begin(eight_wide_vector_functions_avx), - std::end(eight_wide_vector_functions_avx)); - } - break; - } - case llvm::Triple::arm: - case llvm::Triple::aarch64: { - if (has_feature("+neon") && available_intrinsics.neon_intrinsics) { - vector_functions.insert(vector_functions.end(), - std::begin(four_wide_vector_functions_neon), - std::end(four_wide_vector_functions_neon)); - } - break; - } - default: - break; - } + {"logf", runtime::kLogV4F32SymbolName, 4}, + {"llvm.log.f32", runtime::kLogV4F32SymbolName, 4}, - vector_functions.insert(vector_functions.end(), - std::begin(ir_vector_functions), - std::end(ir_vector_functions)); - - return vector_functions; + {"logf", runtime::kLogV8F32SymbolName, 8}, + {"llvm.log.f32", runtime::kLogV8F32SymbolName, 8}, + }; + return result; } -} // namespace void CompilerFunctor::AddTargetInfoPasses( llvm::legacy::PassManagerBase* passes) const { @@ -279,9 +209,7 @@ void CompilerFunctor::AddTargetInfoPasses( auto target_library_info_impl = MakeUnique(target_triple); target_library_info_impl->addVectorizableFunctions( - VectorFunctionsForTargetLibraryInfoImpl( - target_triple.getArch(), target_machine_->getTargetFeatureString(), - available_intrinsics_)); + VectorFunctionsForTargetLibraryInfoImpl()); passes->add( new llvm::TargetLibraryInfoWrapperPass(*target_library_info_impl)); passes->add(createTargetTransformInfoWrapperPass( diff --git a/tensorflow/compiler/xla/service/cpu/compiler_functor.h b/tensorflow/compiler/xla/service/cpu/compiler_functor.h index 8cdd049e7b..1a8283a702 100644 --- a/tensorflow/compiler/xla/service/cpu/compiler_functor.h +++ b/tensorflow/compiler/xla/service/cpu/compiler_functor.h @@ -31,21 +31,10 @@ namespace cpu { // Orc JIT compile layer. class CompilerFunctor { public: - // Describes the set of vector intrinsics available to the generated code. - struct VectorIntrinsics { - bool sse_intrinsics; - bool avx_intrinsics; - bool neon_intrinsics; - }; - - // Returns a VectorIntrinsics where all intrinsics are available. - static VectorIntrinsics AllIntrinsics(); - explicit CompilerFunctor( llvm::TargetMachine* target_machine, const Disassembler* disassembler, int opt_level, bool optimize_for_size, bool enable_fast_math, bool disable_expensive_passes, - const VectorIntrinsics& available_intrinsics, LLVMCompiler::ModuleHook pre_optimization_hook = nullptr, LLVMCompiler::ModuleHook post_optimization_hook = nullptr) : target_machine_(target_machine), @@ -54,7 +43,6 @@ class CompilerFunctor { optimize_for_size_(optimize_for_size), enable_fast_math_(enable_fast_math), disable_expensive_passes_(disable_expensive_passes), - available_intrinsics_(available_intrinsics), pre_optimization_hook_(pre_optimization_hook), post_optimization_hook_(post_optimization_hook) {} @@ -78,7 +66,6 @@ class CompilerFunctor { const bool optimize_for_size_; const bool enable_fast_math_; const bool disable_expensive_passes_; - const VectorIntrinsics available_intrinsics_; LLVMCompiler::ModuleHook pre_optimization_hook_; LLVMCompiler::ModuleHook post_optimization_hook_; }; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index d13a97bcc9..f9cc965184 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -888,8 +888,7 @@ CpuCompiler::CompileAheadOfTime(std::vector> modules, options::OptimizeForSizeRequested(module->config()), module->config().debug_options().xla_enable_fast_math(), module->config().debug_options().xla_llvm_disable_expensive_passes(), - CompilerFunctor::AllIntrinsics(), pre_optimization_ir_dump_hook, - post_optimization_ir_dump_hook); + pre_optimization_ir_dump_hook, post_optimization_ir_dump_hook); llvm::object::OwningBinary object_file = compiler_functor(llvm_module); llvm::StringRef object_file_data_ref = object_file.getBinary()->getData(); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc deleted file mode 100644 index 62bb87f2b0..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.cc +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h" - -#define EIGEN_USE_THREADS - -#include "third_party/eigen3/Eigen/Core" - -#ifdef TF_XLA_HAS_AVX -xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX( - xla::cpu::runtime::V8F32AVX x) { - return Eigen::internal::plog(x); -} -#endif // TF_XLA_HAS_AVX - -namespace xla { -namespace cpu { -namespace runtime { - -const char *const kLogV8F32AVXSymbolName = "__xla_cpu_runtime_LogV8F32AVX"; - -} // namespace runtime -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h deleted file mode 100644 index f473c689f2..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This header declares functions which may be called by the generated code on -// the CPU. Calls to these functions must be resolved explicitly in the JIT in -// xla::cpu::SimpleResolver. It also defines a per-CpuExecutable context -// which is used to cache expensive state and resources utilized by the -// aforementioned functions. - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_ - -#include "tensorflow/core/platform/macros.h" - -#if defined(__AVX__) -#include -#define TF_XLA_HAS_AVX -#endif - -namespace xla { -namespace cpu { -namespace runtime { - -extern const char *const kLogV8F32AVXSymbolName; - -#ifdef TF_XLA_HAS_AVX -typedef __m256 V8F32AVX; -#endif -} // namespace runtime -} // namespace cpu -} // namespace xla - -extern "C" { - -#ifdef TF_XLA_HAS_AVX -// The following functions are vectorized versions of a selection of libm -// library functions. -// References to these functions are created by the LLVM vectorizer. -xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_ExpV8F32AVX( - xla::cpu::runtime::V8F32AVX x); - -xla::cpu::runtime::V8F32AVX __xla_cpu_runtime_LogV8F32AVX( - xla::cpu::runtime::V8F32AVX x); -#endif -} - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_AVX_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc deleted file mode 100644 index 8099b722f1..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" - -#define EIGEN_USE_THREADS - -#include "third_party/eigen3/Eigen/Core" - -#ifdef TF_XLA_HAS_NEON - -xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON( - xla::cpu::runtime::V4F32NEON x) { - return Eigen::internal::pexp(x); -} - -xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON( - xla::cpu::runtime::V4F32NEON x) { - Eigen::internal::Packet4f p = x; - return Eigen::internal::plog(p); -} - -#endif // TF_XLA_HAS_NEON - -namespace xla { -namespace cpu { -namespace runtime { - -const char *const kExpV4F32NEONSymbolName = "__xla_cpu_runtime_ExpV4F32NEON"; -const char *const kLogV4F32NEONSymbolName = "__xla_cpu_runtime_LogV4F32NEON"; - -} // namespace runtime -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h deleted file mode 100644 index 2f5d1a872a..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h +++ /dev/null @@ -1,62 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_ - -// This header declares functions which may be called by the generated code on -// the CPU. Calls to these functions must be resolved explicitly in the JIT in -// xla::cpu::SimpleResolver. - -#include "tensorflow/core/platform/macros.h" - -#ifdef __ARM_NEON__ -// For the other runtimes (AVX, SSE4.1) we define the vector type directly using -// __attribute__((__vector_size__(*))). Unfortunately, the typedef for the ARM -// NEON SIMD types is not portable, so the type has to come from -#include -#define TF_XLA_HAS_NEON -#endif // __ARM_NEON__ - -namespace xla { -namespace cpu { -namespace runtime { - -extern const char *const kExpV4F32NEONSymbolName; -extern const char *const kLogV4F32NEONSymbolName; - -#ifdef TF_XLA_HAS_NEON -typedef float32x4_t V4F32NEON; -#endif // TF_XLA_HAS_NEON - -} // namespace runtime -} // namespace cpu -} // namespace xla - -extern "C" { - -#ifdef TF_XLA_HAS_NEON -// The following functions are vectorized versions of a selection of libm -// library functions. -// References to these functions are created by the LLVM vectorizer. -xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_ExpV4F32NEON( - xla::cpu::runtime::V4F32NEON x); - -xla::cpu::runtime::V4F32NEON __xla_cpu_runtime_LogV4F32NEON( - xla::cpu::runtime::V4F32NEON x); -#endif // TF_XLA_HAS_NEON -} - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_NEON_H_ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc deleted file mode 100644 index 1d5b5c2c1e..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.cc +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" - -#define EIGEN_USE_THREADS - -#include "third_party/eigen3/Eigen/Core" - -#ifdef TF_XLA_HAS_SSE4_1 - -xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE( - xla::cpu::runtime::V4F32SSE x) { - Eigen::internal::Packet4f p = x; - return Eigen::internal::plog(p); -} - -#endif // TF_XLA_HAS_SSE4_1 - -namespace xla { -namespace cpu { -namespace runtime { - -const char *const kLogV4F32SSESymbolName = "__xla_cpu_runtime_LogV4F32SSE"; - -} // namespace runtime -} // namespace cpu -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h deleted file mode 100644 index 3b3d18112a..0000000000 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This header declares functions which may be called by the generated code on -// the CPU. Calls to these functions must be resolved explicitly in the JIT in -// xla::cpu::SimpleResolver. It also defines a per-CpuExecutable context -// which is used to cache expensive state and resources utilized by the -// aforementioned functions. - -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_ - -#include "tensorflow/core/platform/macros.h" - -// MSVC does not have __SSE4_1__ macro. Eigen enables EIGEN_VECTORIZE_SSE4_1 -// when __AVX__ is defined, we should do the same. -#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX__)) -#include -#define TF_XLA_HAS_SSE4_1 -#endif - -namespace xla { -namespace cpu { -namespace runtime { - -extern const char *const kLogV4F32SSESymbolName; - -#ifdef TF_XLA_HAS_SSE4_1 -typedef __m128 V4F32SSE; -#endif - -} // namespace runtime -} // namespace cpu -} // namespace xla - -extern "C" { - -#ifdef TF_XLA_HAS_SSE4_1 -// The following functions are vectorized versions of a selection of libm -// library functions. -// References to these functions are created by the LLVM vectorizer. -xla::cpu::runtime::V4F32SSE __xla_cpu_runtime_LogV4F32SSE( - xla::cpu::runtime::V4F32SSE x); -#endif -} - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_SSE4_1_H_ diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc index 38fcd278e9..ee213e05d1 100644 --- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc @@ -21,6 +21,7 @@ limitations under the License. #include "llvm/IR/Verifier.h" #include "llvm/Transforms/Utils/Cloning.h" #include "tensorflow/compiler/xla/service/cpu/vector_support_library.h" +#include "tensorflow/core/lib/core/casts.h" #include "tensorflow/core/platform/logging.h" namespace xla { @@ -31,6 +32,8 @@ const char* const kTanhV4F32SymbolName = "__xla_cpu_runtime_TanhV4F32"; const char* const kTanhV8F32SymbolName = "__xla_cpu_runtime_TanhV8F32"; const char* const kExpV4F32SymbolName = "__xla_cpu_runtime_ExpV4F32"; const char* const kExpV8F32SymbolName = "__xla_cpu_runtime_ExpV8F32"; +const char* const kLogV4F32SymbolName = "__xla_cpu_runtime_LogV4F32AVX"; +const char* const kLogV8F32SymbolName = "__xla_cpu_runtime_LogV8F32AVX"; namespace { llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module, @@ -116,19 +119,19 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, // This implements the same polynomial approximation as implemented in Eigen3. - const double exp_hi = 88.3762626647950; - const double exp_lo = -88.3762626647949; + const float exp_hi = 88.3762626647950; + const float exp_lo = -88.3762626647949; - const double cephes_LOG2EF = 1.44269504088896341; - const double cephes_exp_C1 = 0.693359375; - const double cephes_exp_C2 = -2.12194440e-4; + const float cephes_LOG2EF = 1.44269504088896341; + const float cephes_exp_C1 = 0.693359375; + const float cephes_exp_C2 = -2.12194440e-4; - const double cephes_exp_p0 = 1.9875691500E-4; - const double cephes_exp_p1 = 1.3981999507E-3; - const double cephes_exp_p2 = 8.3334519073E-3; - const double cephes_exp_p3 = 4.1665795894E-2; - const double cephes_exp_p4 = 1.6666665459E-1; - const double cephes_exp_p5 = 5.0000001201E-1; + const float cephes_exp_p0 = 1.9875691500E-4; + const float cephes_exp_p1 = 1.3981999507E-3; + const float cephes_exp_p2 = 8.3334519073E-3; + const float cephes_exp_p3 = 4.1665795894E-2; + const float cephes_exp_p4 = 1.6666665459E-1; + const float cephes_exp_p5 = 5.0000001201E-1; llvm::Value* input = &*vector_exp_function->arg_begin(); llvm::Value* input_clamped = @@ -146,7 +149,7 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, y = vsl.MulAdd(y, x, cephes_exp_p4); y = vsl.MulAdd(y, x, cephes_exp_p5); y = vsl.MulAdd(y, z, x); - y = vsl.Add(1.0, y); + y = vsl.Add(1.0f, y); // VectorSupportLibrary (intentionally) can't juggle more than one type at a // time so drop down to IRBuilder for this bit. @@ -167,9 +170,133 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, ir_builder.CreateRet(result); - CHECK(!llvm::verifyFunction(*vector_exp_function)); + DCHECK(!llvm::verifyFunction(*vector_exp_function)); return vector_exp_function; } + +llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module, + llvm::StringRef function_name, + int vector_width, + bool enable_fast_math) { + llvm::Function* vector_log_function = module->getFunction(function_name); + if (vector_log_function == nullptr) { + // If the function declaration is not present in the module, there can't be + // any calls to resolve. Don't emit the function in this case. + return nullptr; + } + + llvm::LLVMContext* context = &module->getContext(); + + llvm::BasicBlock* vector_log_body = + llvm::BasicBlock::Create(*context, "body", vector_log_function); + + llvm::IRBuilder<> ir_builder(vector_log_body); + llvm::FastMathFlags fast_math_flags; + fast_math_flags.setFast(); + ir_builder.setFastMathFlags(fast_math_flags); + + llvm::Value* input = &*vector_log_function->arg_begin(); + VectorSupportLibrary vsl(F32, vector_width, &ir_builder, "log_f32"); + + const float half = 0.5; + + // This implements the same polynomial approximation as implemented in Eigen3. + // Returns NaN for x < 0, -INF for x = 0 + const float cephes_SQRTHF = 0.707106781186547524; + const float cephes_log_p0 = 7.0376836292E-2; + const float cephes_log_p1 = -1.1514610310E-1; + const float cephes_log_p2 = 1.1676998740E-1; + const float cephes_log_p3 = -1.2420140846E-1; + const float cephes_log_p4 = +1.4249322787E-1; + const float cephes_log_p5 = -1.6668057665E-1; + const float cephes_log_p6 = +2.0000714765E-1; + const float cephes_log_p7 = -2.4999993993E-1; + const float cephes_log_p8 = +3.3333331174E-1; + const float cephes_log_q1 = -2.12194440e-4; + const float cephes_log_q2 = 0.693359375; + + // The smallest non denormalized float number. + const float min_norm_pos = tensorflow::bit_cast(0x00800000); + const float minus_inf = tensorflow::bit_cast(0xff800000); + + // NB! This number is denormal and since TF sets the denormals-are-zero flag + // (and if TF didn't, -ffast-math would) trying to operate on this float using + // C++ operations (including, for instance, implicit conversion to double) + // will coerce this to zero. + const float inv_mant_mask = tensorflow::bit_cast(~0x7f800000); + + // invalid_mask is set if x is negative or NaN (and therefore output + // must be NaN). + llvm::Value* invalid_mask = vsl.FCmpULEMask(input, vsl.GetZeroVector()); + llvm::Value* iszero_mask = vsl.FCmpEQMask(input, vsl.GetZeroVector()); + + // Cut off denormalized stuff. + input = vsl.Max(min_norm_pos, input); + + // VectorSupportLibrary (intentionally) can't juggle more than one type at a + // time so drop down to IRBuilder for this bit. + llvm::Value* vector_constant_0x7f = + ir_builder.CreateVectorSplat(vector_width, ir_builder.getInt32(0x7f)); + llvm::Value* vector_constant_23 = + ir_builder.CreateVectorSplat(vector_width, ir_builder.getInt32(23)); + llvm::Type* i32_vector_type = + llvm::VectorType::get(ir_builder.getInt32Ty(), vector_width); + + llvm::Value* emm0 = ir_builder.CreateLShr( + ir_builder.CreateBitCast(input, i32_vector_type), vector_constant_23); + + // Keep only the fractional part. + input = vsl.FloatAnd(input, inv_mant_mask); + input = vsl.FloatOr(input, half); + + emm0 = ir_builder.CreateSub(emm0, vector_constant_0x7f); + llvm::Value* e = + vsl.Add(1.0f, ir_builder.CreateSIToFP(emm0, vsl.vector_type())); + + // part2: + // if( x < SQRTHF ) { + // e -= 1; + // x = x + x - 1.0; + // } else { x = x - 1.0; } + llvm::Value* mask = vsl.FCmpOLTMask(input, cephes_SQRTHF); + llvm::Value* tmp = vsl.FloatAnd(input, mask); + input = vsl.Sub(input, 1.0); + e = vsl.Sub(e, vsl.FloatAnd(mask, 1.0)); + input = vsl.Add(input, tmp); + + llvm::Value* x2 = vsl.Mul(input, input); + llvm::Value* x3 = vsl.Mul(x2, input); + + llvm::Value *y, *y1, *y2; + y = vsl.MulAdd(input, cephes_log_p0, cephes_log_p1); + y1 = vsl.MulAdd(input, cephes_log_p3, cephes_log_p4); + y2 = vsl.MulAdd(input, cephes_log_p6, cephes_log_p7); + y = vsl.MulAdd(y, input, cephes_log_p2); + y1 = vsl.MulAdd(y1, input, cephes_log_p5); + y2 = vsl.MulAdd(y2, input, cephes_log_p8); + y = vsl.MulAdd(y, x3, y1); + y = vsl.MulAdd(y, x3, y2); + y = vsl.Mul(y, x3); + + y1 = vsl.Mul(cephes_log_q1, e); + tmp = vsl.Mul(half, x2); + y = vsl.Add(y, y1); + input = vsl.Sub(input, tmp); + y2 = vsl.Mul(cephes_log_q2, e); + input = vsl.Add(input, y); + input = vsl.Add(input, y2); + + // Negative arg will be NAN, 0 will be -INF. + llvm::Value* or_lhs = + vsl.FloatAndNot(iszero_mask, vsl.FloatOr(input, invalid_mask)); + llvm::Value* or_rhs = vsl.FloatAnd(iszero_mask, minus_inf); + llvm::Value* result = vsl.FloatOr(or_lhs, or_rhs); + + ir_builder.CreateRet(result); + + DCHECK(!llvm::verifyFunction(*vector_log_function)); + return vector_log_function; +} } // namespace void RewriteIRRuntimeFunctions(llvm::Module* module, bool enable_fast_math) { @@ -187,11 +314,21 @@ void RewriteIRRuntimeFunctions(llvm::Module* module, bool enable_fast_math) { EmitVectorF32ExpIfNeeded(module, kExpV8F32SymbolName, /*vector_width=*/8, enable_fast_math); + auto* log_v4f32 = + EmitVectorF32LogIfNeeded(module, kLogV4F32SymbolName, + /*vector_width=*/4, enable_fast_math); + auto* log_v8f32 = + EmitVectorF32LogIfNeeded(module, kLogV8F32SymbolName, + /*vector_width=*/8, enable_fast_math); + // Gather all the call sites, force inline them and then delete the vector // function bodies. + // + // TODO(b/73081976): Should we avoid inlining these intrinsics in some cases? std::vector calls_to_inline; - for (auto* function : {tanh_v4f32, tanh_v8f32, exp_v4f32, exp_v8f32}) { + for (auto* function : + {tanh_v4f32, tanh_v8f32, exp_v4f32, exp_v8f32, log_v4f32, log_v8f32}) { if (function != nullptr) { for (auto* user : function->users()) { calls_to_inline.push_back(llvm::cast(user)); @@ -204,7 +341,8 @@ void RewriteIRRuntimeFunctions(llvm::Module* module, bool enable_fast_math) { CHECK(llvm::InlineFunction(call_to_inline, inline_function_info)); } - for (auto* function : {tanh_v4f32, tanh_v8f32, exp_v4f32, exp_v8f32}) { + for (auto* function : + {tanh_v4f32, tanh_v8f32, exp_v4f32, exp_v8f32, log_v4f32, log_v8f32}) { if (function != nullptr) { function->eraseFromParent(); } diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h index 90050c4459..5553972677 100644 --- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.h @@ -27,6 +27,8 @@ extern const char* const kTanhV4F32SymbolName; extern const char* const kTanhV8F32SymbolName; extern const char* const kExpV4F32SymbolName; extern const char* const kExpV8F32SymbolName; +extern const char* const kLogV4F32SymbolName; +extern const char* const kLogV8F32SymbolName; // The following CPU runtime functions have LLVM-IR only implementations: // diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 2f4468cca7..64d3a51f41 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -28,9 +28,6 @@ limitations under the License. #include "llvm/Support/Host.h" #include "tensorflow/compiler/xla/ptr_util.h" #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_avx.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_neon.h" -#include "tensorflow/compiler/xla/service/cpu/cpu_runtime_sse4_1.h" #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h" #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h" @@ -101,27 +98,6 @@ llvm::StringRef GetHostCpuName() { cpu_name.consume_back("-avx512"); return cpu_name; } - -CompilerFunctor::VectorIntrinsics GetAvailableIntrinsics() { - CompilerFunctor::VectorIntrinsics intrinsics; -#ifdef TF_XLA_HAS_SSE4_1 - intrinsics.sse_intrinsics = true; -#else - intrinsics.sse_intrinsics = false; -#endif -#ifdef TF_XLA_HAS_AVX - intrinsics.avx_intrinsics = true; -#else - intrinsics.avx_intrinsics = false; -#endif -#ifdef TF_XLA_HAS_NEON - intrinsics.neon_intrinsics = true; -#else - intrinsics.neon_intrinsics = false; -#endif - return intrinsics; -} - } // namespace SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options, @@ -169,13 +145,12 @@ SimpleOrcJIT::SimpleOrcJIT(const llvm::TargetOptions& target_options, orc_jit_memory_mapper::GetInstance()); }, [this](llvm::orc::VModuleKey K) { return symbol_resolver_; }), - compile_layer_( - object_layer_, - CompilerFunctor(target_machine_.get(), &disassembler_, opt_level, - optimize_for_size, enable_fast_math, - disable_expensive_passes, GetAvailableIntrinsics(), - std::move(pre_optimization_hook), - std::move(post_optimization_hook))) { + compile_layer_(object_layer_, + CompilerFunctor(target_machine_.get(), &disassembler_, + opt_level, optimize_for_size, + enable_fast_math, disable_expensive_passes, + std::move(pre_optimization_hook), + std::move(post_optimization_hook))) { VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str() << " features: " << target_machine_->getTargetFeatureString().str(); } @@ -240,15 +215,6 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); -#ifdef TF_XLA_HAS_NEON - REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32NEON); -#endif -#ifdef TF_XLA_HAS_SSE4_1 - REGISTER_CPU_RUNTIME_SYMBOL(LogV4F32SSE); -#endif -#ifdef TF_XLA_HAS_AVX - REGISTER_CPU_RUNTIME_SYMBOL(LogV8F32AVX); -#endif REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin); REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue); REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation); diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc index ec4215b468..0596e80df4 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc @@ -103,15 +103,92 @@ llvm::Value* VectorSupportLibrary::Div(llvm::Value* lhs, llvm::Value* rhs) { } } -llvm::Value* VectorSupportLibrary::Clamp(llvm::Value* a, double low, - double high) { +llvm::Value* VectorSupportLibrary::Clamp(llvm::Value* a, float low, + float high) { AssertCorrectTypes({a}); llvm::Type* type = a->getType(); CHECK_LT(low, high); CHECK(scalar_type_->isFloatingPointTy()); return llvm_ir::EmitFloatMin( - llvm_ir::EmitFloatMax(a, llvm::ConstantFP::get(type, low), ir_builder_), - llvm::ConstantFP::get(type, high), ir_builder_); + llvm_ir::EmitFloatMax(a, GetConstantFloat(type, low), ir_builder_), + GetConstantFloat(type, high), ir_builder_); +} + +llvm::Value* VectorSupportLibrary::FCmpEQMask(llvm::Value* lhs, + llvm::Value* rhs) { + AssertCorrectTypes({lhs, rhs}); + return I1ToFloat(ir_builder()->CreateFCmpOEQ(lhs, rhs, name())); +} + +llvm::Value* VectorSupportLibrary::FCmpOLTMask(llvm::Value* lhs, + llvm::Value* rhs) { + AssertCorrectTypes({lhs, rhs}); + return I1ToFloat(ir_builder()->CreateFCmpOLT(lhs, rhs, name())); +} + +llvm::Value* VectorSupportLibrary::FCmpULEMask(llvm::Value* lhs, + llvm::Value* rhs) { + AssertCorrectTypes({lhs, rhs}); + return I1ToFloat(ir_builder()->CreateFCmpULE(lhs, rhs, name())); +} + +llvm::Value* VectorSupportLibrary::I1ToFloat(llvm::Value* i1) { + bool is_vector = llvm::isa(i1->getType()); + llvm::Type* integer_type = IntegerTypeForFloatSize(is_vector); + return ir_builder()->CreateBitCast( + ir_builder()->CreateSExt(i1, integer_type, name()), + is_vector ? vector_type() : scalar_type(), name()); +} + +llvm::Type* VectorSupportLibrary::IntegerTypeForFloatSize(bool vector) { + CHECK(scalar_type()->isFloatingPointTy()); + const llvm::DataLayout& data_layout = + ir_builder()->GetInsertBlock()->getModule()->getDataLayout(); + int64 float_size_bits = data_layout.getTypeSizeInBits(scalar_type()); + llvm::Type* scalar_int_type = ir_builder()->getIntNTy(float_size_bits); + if (vector) { + return llvm::VectorType::get(scalar_int_type, vector_size()); + } else { + return scalar_int_type; + } +} + +llvm::Value* VectorSupportLibrary::BroadcastScalar(llvm::Value* x) { + CHECK_EQ(x->getType(), scalar_type()); + return ir_builder()->CreateVectorSplat(vector_size(), x, name()); +} + +llvm::Value* VectorSupportLibrary::FloatAnd(llvm::Value* lhs, + llvm::Value* rhs) { + AssertCorrectTypes({lhs, rhs}); + llvm::Type* int_type = + IntegerTypeForFloatSize(lhs->getType() == vector_type()); + return ir_builder()->CreateBitCast( + ir_builder()->CreateAnd( + ir_builder()->CreateBitCast(lhs, int_type, name()), + ir_builder()->CreateBitCast(rhs, int_type, name()), name()), + vector_type()); +} + +llvm::Value* VectorSupportLibrary::FloatNot(llvm::Value* lhs) { + AssertCorrectTypes({lhs}); + llvm::Type* int_type = + IntegerTypeForFloatSize(lhs->getType() == vector_type()); + return ir_builder()->CreateBitCast( + ir_builder()->CreateNot( + ir_builder()->CreateBitCast(lhs, int_type, name()), name()), + vector_type()); +} + +llvm::Value* VectorSupportLibrary::FloatOr(llvm::Value* lhs, llvm::Value* rhs) { + AssertCorrectTypes({lhs, rhs}); + llvm::Type* int_type = + IntegerTypeForFloatSize(lhs->getType() == vector_type()); + return ir_builder()->CreateBitCast( + ir_builder()->CreateOr(ir_builder()->CreateBitCast(lhs, int_type, name()), + ir_builder()->CreateBitCast(rhs, int_type, name()), + name()), + vector_type(), name()); } llvm::Value* VectorSupportLibrary::AddInternal(llvm::Value* lhs, diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.h b/tensorflow/compiler/xla/service/cpu/vector_support_library.h index 5c5d703db5..010c82f0cf 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.h +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.h @@ -41,40 +41,82 @@ class VectorSupportLibrary { llvm::Value* Mul(int64 lhs, llvm::Value* rhs) { return Mul(ir_builder()->getInt64(lhs), rhs); } - llvm::Value* Mul(double lhs, llvm::Value* rhs) { - return Mul(llvm::ConstantFP::get(rhs->getType(), lhs), rhs); + llvm::Value* Mul(float lhs, llvm::Value* rhs) { + return Mul(GetConstantFloat(rhs->getType(), lhs), rhs); } llvm::Value* Add(llvm::Value* lhs, llvm::Value* rhs); llvm::Value* Add(int64 lhs, llvm::Value* rhs) { return Add(ir_builder()->getInt64(lhs), rhs); } - llvm::Value* Add(double lhs, llvm::Value* rhs) { - return Add(llvm::ConstantFP::get(vector_type(), lhs), rhs); + llvm::Value* Add(float lhs, llvm::Value* rhs) { + return Add(GetConstantFloat(rhs->getType(), lhs), rhs); } llvm::Value* Sub(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Sub(llvm::Value* lhs, float rhs) { + return Sub(lhs, GetConstantFloat(lhs->getType(), rhs)); + } llvm::Value* Max(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* Max(float lhs, llvm::Value* rhs) { + return Max(GetConstantFloat(rhs->getType(), lhs), rhs); + } llvm::Value* Div(llvm::Value* lhs, llvm::Value* rhs); llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, llvm::Value* c) { return Add(c, Mul(a, b)); } - llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, double c) { - return Add(llvm::ConstantFP::get(vector_type(), c), Mul(a, b)); + llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, float c) { + return Add(GetConstantFloat(vector_type(), c), Mul(a, b)); } - llvm::Value* MulAdd(llvm::Value* a, double b, double c) { - return Add(llvm::ConstantFP::get(a->getType(), c), - Mul(a, llvm::ConstantFP::get(a->getType(), b))); + llvm::Value* MulAdd(llvm::Value* a, float b, float c) { + return Add(GetConstantFloat(a->getType(), c), + Mul(a, GetConstantFloat(a->getType(), b))); } llvm::Value* Floor(llvm::Value* a); - llvm::Value* Clamp(llvm::Value* a, double low, double high); - llvm::Value* SplatFloat(double d) { - return llvm::ConstantFP::get(vector_type(), d); + llvm::Value* Clamp(llvm::Value* a, float low, float high); + llvm::Value* SplatFloat(float d) { + return GetConstantFloat(vector_type(), d); + } + + // These compare instructions return a floating point typed mask instead of an + // i1. For instance, on a vector typed input, lanes where the predicate is + // true get a float with all ones and other lanes get a float with all zeros. + // This is slightly odd from the perspective of LLVM's type system, but it + // makes kernel IR generation code written using VectorSupportLibrary (its + // raison d'etre) less cluttered. + + llvm::Value* FCmpEQMask(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* FCmpULEMask(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* FCmpOLTMask(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* FCmpOLTMask(llvm::Value* lhs, float rhs) { + return FCmpOLTMask(lhs, GetConstantFloat(lhs->getType(), rhs)); + } + + // These boolean operations operate on the bitwise values of the floating + // point inputs. They return a (vector of) float(s) but like in the mask + // generating predicates above this type system oddity makes the kernel IR + // generation code less cluttered. + llvm::Value* FloatAnd(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* FloatAnd(llvm::Value* lhs, float rhs) { + return FloatAnd(lhs, GetConstantFloat(lhs->getType(), rhs)); + } + llvm::Value* FloatOr(llvm::Value* lhs, llvm::Value* rhs); + llvm::Value* FloatOr(llvm::Value* lhs, float rhs) { + return FloatOr(lhs, GetConstantFloat(lhs->getType(), rhs)); + } + llvm::Value* FloatNot(llvm::Value* lhs); + llvm::Value* FloatAndNot(llvm::Value* lhs, llvm::Value* rhs) { + return FloatAnd(FloatNot(lhs), rhs); + } + + llvm::Value* BroadcastScalar(llvm::Value* x); + llvm::Value* BroadcastScalar(float d) { + return BroadcastScalar(GetConstantFloat(scalar_type(), d)); } llvm::Value* ComputeOffsetPointer(llvm::Value* base_pointer, @@ -194,6 +236,17 @@ class VectorSupportLibrary { std::vector ComputeAvxOptimizedHorizontalSums( std::vector vectors, llvm::Value* init_values); + llvm::Type* IntegerTypeForFloatSize(bool vector); + llvm::Value* I1ToFloat(llvm::Value* i1); + llvm::Value* GetConstantFloat(llvm::Type* type, float f) { + llvm::Constant* scalar_value = + llvm::ConstantFP::get(type->getContext(), llvm::APFloat(f)); + if (llvm::isa(type)) { + return llvm::ConstantVector::getSplat(vector_size(), scalar_value); + } + return scalar_value; + } + int64 vector_size_; PrimitiveType primitive_type_; llvm::IRBuilder<>* ir_builder_; diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 87ac7731ba..7e9005001d 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -2121,6 +2121,44 @@ XLA_TEST_F(ArrayElementwiseOpTest, ExpF32sVector) { error_spec_); } +XLA_TEST_F(ArrayElementwiseOpTest, LogF32sVector) { + // The input tensor is large enough to exercise the vectorized exp + // implementation on XLA CPU. + ComputationBuilder builder(client_, TestName()); + + std::unique_ptr input_literal = Literal::CreateR1( + {-1.29, -1.41, -1.25, -13.5, -11.7, -17.9, -198, + -167, 1.29, 1.41, 1.25, 13.5, 11.7, 17.9, + 198, 167, 1.27e+03, 1.33e+03, 1.74e+03, 1.6e+04, 1.84e+04, + 1.74e+04, 1.89e+05, 1.9e+05, 1.93e+06, 1.98e+06, 1.65e+06, 1.97e+07, + 1.66e+07, 1e+07, 1.98e+08, 1.96e+08, 1.64e+09, 1.58e+09, 1.64e+09, + 1.44e+10, 1.5e+10, 1.99e+10, 1.17e+11, 1.08e+11, 1.08e+12, 1.38e+12, + 1.4e+12, 1.03e+13, 1.6e+13, 1.99e+13, 1.26e+14, 1.51e+14, 1.33e+15, + 1.41e+15, 1.63e+15, 1.39e+16, 1.21e+16, 1.27e+16, 1.28e+17, 1.62e+17, + 2e+18, 1.96e+18, 1.81e+18, 1.99e+19, 1.86e+19, 1.61e+19, 1.71e+20, + 1.47e+20, 1.83e+21, 1.33e+21, 1.3e+21, 1.35e+22, 1.84e+22, 1.02e+22, + 1.81e+23, 1.02e+23, 1.89e+24, 1.49e+24, 1.08e+24, 1.95e+25, 1.1e+25, + 1.62e+25, 1.2e+26, 1.41e+26, 1.93e+27, 1.66e+27, 1.62e+27, 1.05e+28, + 1.5e+28, 1.79e+28, 1.36e+29, 1.95e+29, 1.5e+30, 1.81e+30, 1.34e+30, + 1.7e+31, 1.44e+31, 1.1e+31, 1.4e+32, 1.67e+32, 1.96e+33, 1.11e+33, + 1.19e+33, 1.61e+34, 1.05e+34, 1.88e+34, 1.67e+35, 1.7e+35}); + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr input_data, + client_->TransferToServer(*input_literal)); + + auto input = builder.Parameter(0, input_literal->shape(), "input"); + builder.Log(input); + + std::vector expected_result; + int64 input_size = input_literal->shape().dimensions(0); + expected_result.reserve(input_size); + for (int64 i = 0; i < input_size; i++) { + expected_result.push_back(std::log(input_literal->Get({i}))); + } + + ComputeAndCompareR1(&builder, expected_result, {input_data.get()}, + error_spec_); +} + XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) { // a ------ (add) --------- (add) // / / -- GitLab From eec85cb5e4eac9764a34debccac0e70d37519b3d Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Mon, 12 Feb 2018 22:45:49 -0800 Subject: [PATCH 107/629] [TF:XLA] Work around crash in Gather op on CPU backend by making loop bound a compile-time constant. PiperOrigin-RevId: 185486148 --- .../compiler/tf2xla/kernels/gather_op.cc | 6 +++--- tensorflow/compiler/tf2xla/lib/BUILD | 1 + tensorflow/compiler/tf2xla/lib/scatter.cc | 7 +++---- tensorflow/compiler/tf2xla/lib/while_loop.cc | 20 ++++++++----------- tensorflow/compiler/tf2xla/lib/while_loop.h | 2 +- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/gather_op.cc b/tensorflow/compiler/tf2xla/kernels/gather_op.cc index 24f7bebdad..7945c05af4 100644 --- a/tensorflow/compiler/tf2xla/kernels/gather_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/gather_op.cc @@ -153,9 +153,9 @@ Status XlaGather(const xla::ComputationDataHandle& input, }; // Construct the While loop, extract and reshape the output. - auto num_indices_value = - XlaHelpers::IntegerLiteral(builder, index_type, num_indices); - TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices_value, body_fn, + xla::PrimitiveType ptype; + TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(index_type, &ptype)); + TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices, ptype, body_fn, init, "gather", builder)); *gather_output = builder->Reshape(outputs[2], out_shape.dim_sizes()); return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/lib/BUILD b/tensorflow/compiler/tf2xla/lib/BUILD index da2fdd1d34..488fda74bf 100644 --- a/tensorflow/compiler/tf2xla/lib/BUILD +++ b/tensorflow/compiler/tf2xla/lib/BUILD @@ -131,6 +131,7 @@ cc_library( srcs = ["while_loop.cc"], hdrs = ["while_loop.h"], deps = [ + ":util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc index e8026ecc8d..6009243f97 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.cc +++ b/tensorflow/compiler/tf2xla/lib/scatter.cc @@ -180,10 +180,9 @@ xla::StatusOr XlaScatter( return std::vector{indices, updates, buffer}; }; - xla::ComputationDataHandle num_indices_value = - IntegerLiteral(builder, indices_shape->element_type(), num_indices); - TF_ASSIGN_OR_RETURN(auto outputs, XlaForEachIndex(num_indices_value, body_fn, - init, "scatter", builder)); + TF_ASSIGN_OR_RETURN( + auto outputs, XlaForEachIndex(num_indices, indices_shape->element_type(), + body_fn, init, "scatter", builder)); return outputs[2]; } diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.cc b/tensorflow/compiler/tf2xla/lib/while_loop.cc index d35f6cd13f..86c02ac2e6 100644 --- a/tensorflow/compiler/tf2xla/lib/while_loop.cc +++ b/tensorflow/compiler/tf2xla/lib/while_loop.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/tf2xla/lib/while_loop.h" +#include "tensorflow/compiler/tf2xla/lib/util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -79,19 +80,16 @@ xla::StatusOr> XlaWhileLoop( } xla::StatusOr> XlaForEachIndex( - const xla::ComputationDataHandle& num_iterations, + int64 num_iterations, xla::PrimitiveType num_iterations_type, const ForEachIndexBodyFunction& body_function, gtl::ArraySlice initial_values, StringPiece name, xla::ComputationBuilder* builder) { - TF_ASSIGN_OR_RETURN(std::unique_ptr num_iterations_shape, - builder->GetShape(num_iterations)); - TF_RET_CHECK(xla::ShapeUtil::IsScalar(*num_iterations_shape)); - xla::PrimitiveType num_iterations_type = num_iterations_shape->element_type(); - auto while_cond_fn = [&](gtl::ArraySlice values, xla::ComputationBuilder* cond_builder) -> xla::StatusOr { - return cond_builder->Lt(values[0], values[1]); + return cond_builder->Lt( + values[0], + IntegerLiteral(cond_builder, num_iterations_type, num_iterations)); }; auto while_body_fn = [&](gtl::ArraySlice values, xla::ComputationBuilder* body_builder) @@ -103,9 +101,8 @@ xla::StatusOr> XlaForEachIndex( updated_values.push_back(body_builder->Add( iteration, body_builder->ConstantLiteral(xla::Literal::One(num_iterations_type)))); - updated_values.push_back(values[1]); - values.remove_prefix(2); + values.remove_prefix(1); TF_ASSIGN_OR_RETURN(std::vector body_outputs, body_function(iteration, values, body_builder)); updated_values.insert(updated_values.end(), body_outputs.begin(), @@ -114,15 +111,14 @@ xla::StatusOr> XlaForEachIndex( }; std::vector values; - values.reserve(initial_values.size() + 2); + values.reserve(initial_values.size() + 1); values.push_back( builder->ConstantLiteral(xla::Literal::Zero(num_iterations_type))); - values.push_back(num_iterations); values.insert(values.end(), initial_values.begin(), initial_values.end()); TF_ASSIGN_OR_RETURN(values, XlaWhileLoop(while_cond_fn, while_body_fn, values, name, builder)); - values.erase(values.begin(), values.begin() + 2); + values.erase(values.begin(), values.begin() + 1); return values; } diff --git a/tensorflow/compiler/tf2xla/lib/while_loop.h b/tensorflow/compiler/tf2xla/lib/while_loop.h index 562a589fbf..2e67a0c99b 100644 --- a/tensorflow/compiler/tf2xla/lib/while_loop.h +++ b/tensorflow/compiler/tf2xla/lib/while_loop.h @@ -64,7 +64,7 @@ typedef std::function>( ForEachIndexBodyFunction; xla::StatusOr> XlaForEachIndex( - const xla::ComputationDataHandle& num_iterations, + int64 num_iterations, xla::PrimitiveType num_iterations_type, const ForEachIndexBodyFunction& body_function, gtl::ArraySlice initial_values, StringPiece name, xla::ComputationBuilder* builder); -- GitLab From d9976e40c03eab6ed7a767ff7531d0b1aa870739 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Feb 2018 22:54:48 -0800 Subject: [PATCH 108/629] 1. Add image_ops.is_jpeg Op to decide if a input string is a jpeg string or not. 2. Change tfexample_decoder in slim/objection_detection to accept different JPEG decompression method. Defaults to ""/None which maps to a system-specific default. Currently valid values are ["INTEGER_FAST", "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal jpeg library changes to a version that does not have that specific option.) PiperOrigin-RevId: 185486653 --- .../python/slim/data/tfexample_decoder.py | 31 ++++++++++++++++--- tensorflow/python/ops/image_ops.py | 1 + tensorflow/python/ops/image_ops_impl.py | 24 ++++++++++++-- .../tools/api/golden/tensorflow.image.pbtxt | 4 +++ 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py index 0544404e9e..b3b61e1dfe 100644 --- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py +++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py @@ -349,7 +349,8 @@ class Image(ItemHandler): shape=None, channels=3, dtype=dtypes.uint8, - repeated=False): + repeated=False, + dct_method=''): """Initializes the image. Args: @@ -368,6 +369,11 @@ class Image(ItemHandler): tf.decode_raw, repeated: if False, decodes a single image. If True, decodes a variable number of image strings from a 1D tensor of strings. + dct_method: An optional string. Defaults to empty string. It only takes + effect when image format is jpeg, used to specify a hint about the + algorithm used for jpeg decompression. Currently valid values + are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for + example, the jpeg library does not have that specific option. """ if not image_key: image_key = 'image/encoded' @@ -381,6 +387,7 @@ class Image(ItemHandler): self._channels = channels self._dtype = dtype self._repeated = repeated + self._dct_method = dct_method def tensors_to_item(self, keys_to_tensors): """See base class.""" @@ -406,9 +413,25 @@ class Image(ItemHandler): A tensor that represents decoded image of self._shape, or (?, ?, self._channels) if self._shape is not specified. """ + def decode_image(): - """Decodes a png or jpg based on the headers.""" - return image_ops.decode_image(image_buffer, self._channels) + """Decodes a image based on the headers.""" + return image_ops.decode_image(image_buffer, channels=self._channels) + + def decode_jpeg(): + """Decodes a jpeg image with specified '_dct_method'.""" + return image_ops.decode_jpeg( + image_buffer, channels=self._channels, dct_method=self._dct_method) + + def check_jpeg(): + """Checks if an image is jpeg.""" + # For jpeg, we directly use image_ops.decode_jpeg rather than decode_image + # in order to feed the jpeg specify parameter 'dct_method'. + return control_flow_ops.cond( + image_ops.is_jpeg(image_buffer), + decode_jpeg, + decode_image, + name='cond_jpeg') def decode_raw(): """Decodes a raw image.""" @@ -420,7 +443,7 @@ class Image(ItemHandler): math_ops.equal(image_format, 'RAW')): decode_raw, } image = control_flow_ops.case( - pred_fn_pairs, default=decode_image, exclusive=True) + pred_fn_pairs, default=check_jpeg, exclusive=True) image.set_shape([None, None, self._channels]) if self._shape is not None: diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py index de12c5f63f..ae52d32fea 100644 --- a/tensorflow/python/ops/image_ops.py +++ b/tensorflow/python/ops/image_ops.py @@ -26,6 +26,7 @@ See the @{$python/image} guide. @@extract_jpeg_shape @@decode_png @@encode_png +@@is_jpeg @@decode_image @@resize_images @@resize_area diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 14a38f25d1..bcd9e5683a 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1339,6 +1339,26 @@ def adjust_saturation(image, saturation_factor, name=None): orig_dtype) +@tf_export('image.is_jpeg') +def is_jpeg(contents, name=None): + r"""Convenience function to check if the 'contents' encodes a JPEG image. + + Args: + contents: 0-D `string`. The encoded image bytes. + name: A name for the operation (optional) + + Returns: + A scalar boolean tensor indicating if 'contents' may be a JPEG image. + is_jpeg is susceptible to false positives. + """ + # Normal JPEGs start with \xff\xd8\xff\xe0 + # JPEG with EXIF stats with \xff\xd8\xff\xe1 + # Use \xff\xd8\xff to cover both. + with ops.name_scope(name, 'is_jpeg'): + substr = string_ops.substr(contents, 0, 3) + return math_ops.equal(substr, b'\xff\xd8\xff', name=name) + + @tf_export('image.decode_image') def decode_image(contents, channels=None, name=None): """Convenience function for `decode_bmp`, `decode_gif`, `decode_jpeg`, @@ -1427,8 +1447,8 @@ def decode_image(contents, channels=None, name=None): # Decode normal JPEG images (start with \xff\xd8\xff\xe0) # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). - is_jpeg = math_ops.equal(substr, b'\xff\xd8\xff', name='is_jpeg') - return control_flow_ops.cond(is_jpeg, _jpeg, check_png, name='cond_jpeg') + return control_flow_ops.cond( + is_jpeg(contents), _jpeg, check_png, name='cond_jpeg') @tf_export('image.total_variation') diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index baedf596e8..bda1c2bf85 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -100,6 +100,10 @@ tf_module { name: "hsv_to_rgb" argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "is_jpeg" + argspec: "args=[\'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "non_max_suppression" argspec: "args=[\'boxes\', \'scores\', \'max_output_size\', \'iou_threshold\', \'name\'], varargs=None, keywords=None, defaults=[\'0.5\', \'None\'], " -- GitLab From e6ad87898b4ad59598105c3f7cedd7e7fe7e02bc Mon Sep 17 00:00:00 2001 From: MyungsungKwak Date: Tue, 13 Feb 2018 16:17:36 +0900 Subject: [PATCH 109/629] Fix typo in build_and_run_inception_hexagon.sh (#16968) Signed-off-by: MyungSung Kwak --- .../makefile/samples/build_and_run_inception_hexagon.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh index 203ff4f890..421ddd210f 100755 --- a/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh +++ b/tensorflow/contrib/makefile/samples/build_and_run_inception_hexagon.sh @@ -36,7 +36,7 @@ while getopts "bc:Eps" opt_name; do b) BUILD_ONLY="true";; c) TEST_COUNT="${OPTARG}";; E) ENABLE_EXPERIMENTAL_HEXNN_OPS="true";; - p) USE_PREBUILT_HEXAOGON_BINARIES="true";; + p) USE_PREBUILT_HEXAGON_BINARIES="true";; s) SKIP_DOWNLOAD_IF_EXIST="true";; *) usage;; esac @@ -49,7 +49,7 @@ if [[ -z "${NDK_ROOT}" ]]; then exit 1 fi -if [[ "${USE_PREBUILT_HEXAOGON_BINARIES}" != "true" && +if [[ "${USE_PREBUILT_HEXAGON_BINARIES}" != "true" && -z "${QUALCOMM_SDK}" ]]; then echo "QUALCOMM_SDK is empty" 1>&2 usage @@ -84,7 +84,7 @@ rm -rf "${GEN_DIR}" mkdir -p "${GEN_LIBS_DIR}" mkdir -p "${GEN_DOWNLOAD_DIR}" -if [[ "${USE_PREBUILT_HEXAOGON_BINARIES}" == "true" ]]; then +if [[ "${USE_PREBUILT_HEXAGON_BINARIES}" == "true" ]]; then echo "Download prebuilt hexagon binaries" if [[ "${BUILD_ONLY}" != "true" ]]; then CONTROLLER_PUSH_DEST="/data/local/tmp" -- GitLab From 14d6bcb78f45bd361a8b5eb4a61794f5544f4c24 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 12 Feb 2018 23:20:15 -0800 Subject: [PATCH 110/629] Disable interleave_dataset_op_test.py and remove a duplicate entry in test blacklist in cmake build. PiperOrigin-RevId: 185488210 --- tensorflow/contrib/cmake/tf_tests.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index f9a7e6e8b9..0a8d691f32 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -276,8 +276,7 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py" # Segfaults on windows "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/dataset_constructor_op_test.py" # Segfaults on Windows. "${tensorflow_source_dir}/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py" - # Broken tensorboard test due to cmake issues. - "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/iterator_ops_cluster_test.py" # Needs portpicker + "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/interleave_dataset_op_test.py" # Deadlocks "${tensorflow_source_dir}/tensorflow/contrib/data/python/kernel_tests/sloppy_transformation_dataset_op_test.py" # b/65430561 # tensor_forest tests (also note that we exclude the hybrid tests for now) "${tensorflow_source_dir}/tensorflow/contrib/tensor_forest/python/kernel_tests/count_extremely_random_stats_op_test.py" # Results in wrong order. -- GitLab From b76b2372bfa6dbdd09e1c6d7e2dff8ef53bc2b1e Mon Sep 17 00:00:00 2001 From: Surya Bhupatiraju Date: Mon, 12 Feb 2018 23:31:38 -0800 Subject: [PATCH 111/629] Add test to ensure that covariance terms of FID is being incorporated meaningfully. PiperOrigin-RevId: 185488757 --- .../eval/python/classifier_metrics_test.py | 68 +++++++++++++++---- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py index 1e18c699ba..61dc8646dd 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py @@ -181,7 +181,8 @@ class ClassifierMetricsTest(test.TestCase): batch_size = 3 img = array_ops.ones([batch_size, 299, 299, 3]) pool = _run_with_mock( - classifier_metrics.run_inception, img, + classifier_metrics.run_inception, + img, output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) self.assertTrue(isinstance(pool, ops.Tensor)) @@ -195,9 +196,12 @@ class ClassifierMetricsTest(test.TestCase): batch_size = 3 img = array_ops.ones([batch_size, 299, 299, 3]) logits, pool = _run_with_mock( - classifier_metrics.run_inception, img, - output_tensor=[classifier_metrics.INCEPTION_OUTPUT, - classifier_metrics.INCEPTION_FINAL_POOL]) + classifier_metrics.run_inception, + img, + output_tensor=[ + classifier_metrics.INCEPTION_OUTPUT, + classifier_metrics.INCEPTION_FINAL_POOL + ]) self.assertTrue(isinstance(logits, ops.Tensor)) self.assertTrue(isinstance(pool, ops.Tensor)) @@ -209,8 +213,10 @@ class ClassifierMetricsTest(test.TestCase): def test_inception_score_graph(self): """Test `inception_score` graph construction.""" - score = _run_with_mock(classifier_metrics.inception_score, - array_ops.zeros([6, 299, 299, 3]), num_batches=3) + score = _run_with_mock( + classifier_metrics.inception_score, + array_ops.zeros([6, 299, 299, 3]), + num_batches=3) self.assertTrue(isinstance(score, ops.Tensor)) score.shape.assert_has_rank(0) @@ -248,12 +254,14 @@ class ClassifierMetricsTest(test.TestCase): array_ops.zeros([8, 10], dtype=dtypes.int32), p_logits, q) with self.assertRaisesRegexp(ValueError, 'must be floating type'): - classifier_metrics._kl_divergence( - p, array_ops.zeros([8, 10], dtype=dtypes.int32), q) + classifier_metrics._kl_divergence(p, + array_ops.zeros( + [8, 10], dtype=dtypes.int32), q) with self.assertRaisesRegexp(ValueError, 'must be floating type'): - classifier_metrics._kl_divergence( - p, p_logits, array_ops.zeros([10], dtype=dtypes.int32)) + classifier_metrics._kl_divergence(p, p_logits, + array_ops.zeros( + [10], dtype=dtypes.int32)) with self.assertRaisesRegexp(ValueError, 'must have rank 2'): classifier_metrics._kl_divergence(array_ops.zeros([8]), p_logits, q) @@ -266,8 +274,9 @@ class ClassifierMetricsTest(test.TestCase): def test_inception_score_value(self): """Test that `inception_score` gives the correct value.""" - logits = np.array([np.array([1, 2] * 500 + [4]), - np.array([4, 5] * 500 + [6])]) + logits = np.array( + [np.array([1, 2] * 500 + [4]), + np.array([4, 5] * 500 + [6])]) unused_image = array_ops.zeros([2, 299, 299, 3]) incscore = _run_with_mock(classifier_metrics.inception_score, unused_image) @@ -285,9 +294,11 @@ class ClassifierMetricsTest(test.TestCase): test_pool_real_a = np.float32(np.random.randn(512, 256)) test_pool_gen_a = np.float32(np.random.randn(512, 256)) - fid_op = _run_with_mock(classifier_metrics.frechet_classifier_distance, - test_pool_real_a, test_pool_gen_a, - classifier_fn=lambda x: x) + fid_op = _run_with_mock( + classifier_metrics.frechet_classifier_distance, + test_pool_real_a, + test_pool_gen_a, + classifier_fn=lambda x: x) with self.test_session() as sess: actual_fid = sess.run(fid_op) @@ -296,6 +307,33 @@ class ClassifierMetricsTest(test.TestCase): self.assertAllClose(expected_fid, actual_fid, 0.0001) + def test_frechet_classifier_distance_covariance(self): + """Test that `frechet_classifier_distance` takes covariance into account.""" + np.random.seed(0) + + # Make num_examples > num_features to ensure scipy's sqrtm function + # doesn't return a complex matrix. + test_pool_reals, test_pool_gens = [], [] + for i in range(1, 11, 2): + test_pool_reals.append(np.float32(np.random.randn(2048, 256) * i)) + test_pool_gens.append(np.float32(np.random.randn(2048, 256) * i)) + + fid_ops = [] + for i in range(len(test_pool_reals)): + fid_ops.append(_run_with_mock( + classifier_metrics.frechet_classifier_distance, + test_pool_reals[i], + test_pool_gens[i], + classifier_fn=lambda x: x)) + + fids = [] + with self.test_session() as sess: + for fid_op in fid_ops: + fids.append(sess.run(fid_op)) + + # Check that the FIDs increase monotonically. + self.assertTrue(all(fid_a < fid_b for fid_a, fid_b in zip(fids, fids[1:]))) + def test_trace_sqrt_product_value(self): """Test that `trace_sqrt_product` gives the correct value.""" np.random.seed(0) -- GitLab From c9da79df105f77264855576760c5ba92b7fe0470 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 13 Feb 2018 00:13:28 -0800 Subject: [PATCH 112/629] Internal change PiperOrigin-RevId: 185491705 --- .../contrib/data/python/kernel_tests/sql_dataset_op_test.py | 1 + tensorflow/contrib/summary/summary_test_internal.py | 1 + tensorflow/contrib/summary/summary_test_util.py | 1 + 3 files changed, 3 insertions(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py index efd864f866..e26cef8ec5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/sql_dataset_op_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os + import sqlite3 from tensorflow.contrib.data.python.ops import readers diff --git a/tensorflow/contrib/summary/summary_test_internal.py b/tensorflow/contrib/summary/summary_test_internal.py index 80f60ae401..d0d3384735 100644 --- a/tensorflow/contrib/summary/summary_test_internal.py +++ b/tensorflow/contrib/summary/summary_test_internal.py @@ -20,6 +20,7 @@ from __future__ import print_function import functools import os + import sqlite3 from tensorflow.contrib.summary import summary_ops diff --git a/tensorflow/contrib/summary/summary_test_util.py b/tensorflow/contrib/summary/summary_test_util.py index bda57e6a0c..8506c4be9c 100644 --- a/tensorflow/contrib/summary/summary_test_util.py +++ b/tensorflow/contrib/summary/summary_test_util.py @@ -21,6 +21,7 @@ from __future__ import print_function import functools import os + import sqlite3 from tensorflow.contrib.summary import summary_ops -- GitLab From a1befe0603418c4a8bc3ea143bd757ac1d5a1fec Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 01:08:22 -0800 Subject: [PATCH 113/629] Mechanical variable renaming to improve consistency. No other changes. Distinguishing between points and vectors: A point refers to a location in the tensor/filter, referred to by channel/row/col. A vector is the difference between two points (mostly 'one_past_the_end - begin'), referred to by depth/height/width. PiperOrigin-RevId: 185496176 --- .../core/kernels/depthwise_conv_op_gpu.cu.cc | 1104 +++++++++-------- 1 file changed, 580 insertions(+), 524 deletions(-) diff --git a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc index 1e9345828a..e9d43f6496 100644 --- a/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthwise_conv_op_gpu.cu.cc @@ -52,13 +52,13 @@ EIGEN_DEVICE_FUNC bool CanLaunchDepthwiseConv2dGPUSmall( // Returns whether depthwise convolution backward filter pass can be performed // using the faster ('Small') variant of the kernel. EIGEN_DEVICE_FUNC bool CanLaunchDepthwiseConv2dBackpropFilterGPUSmall( - const DepthwiseArgs& args, const int block_rows) { + const DepthwiseArgs& args, const int block_height) { return args.depth_multiplier == 1 && args.stride == 1 && args.in_rows <= 32 && args.in_cols <= 32 && args.in_rows == args.out_rows && args.in_cols == args.out_cols && args.pad_rows >= 0 && args.pad_rows < args.filter_rows && args.pad_cols >= 0 && - args.pad_cols < args.filter_cols && block_rows <= args.in_rows && - args.filter_rows * args.filter_cols <= args.in_cols * block_rows; + args.pad_cols < args.filter_cols && block_height <= args.in_rows && + args.filter_rows * args.filter_cols <= args.in_cols * block_height; } // The DepthwiseConv2dGPUKernels perform either forward or backprop input @@ -72,72 +72,81 @@ template (0); - const int input_offset_temp = in_rows * OB; + const int input_offset_temp = in_height * batch; if (input_row_start >= 0 && input_col_start >= 0 && - input_row_end < in_rows && input_col_end < in_cols) { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = input_row_start + f_r; - const int filter_offset_temp = filter_cols * f_r; - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = input_col_start + f_c; + input_row_end < in_height && input_col_end < in_width) { + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = input_row_start + filter_row; + const int filter_offset_temp = filter_width * filter_row; + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = input_col_start + filter_col; const int input_offset = - in_d + in_depth * (in_c + in_cols * (in_r + input_offset_temp)); + in_channel + + in_depth * (in_col + in_width * (in_row + input_offset_temp)); const int filter_offset = multiplier + - depth_multiplier * (in_d + in_depth * (f_c + filter_offset_temp)); + depth_multiplier * + (in_channel + in_depth * (filter_col + filter_offset_temp)); sum += ldg(input + input_offset) * ldg(filter + filter_offset); } } } else { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = input_row_start + f_r; - const int filter_offset_temp = filter_cols * f_r; - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = input_col_start + f_c; - if (in_r >= 0 && in_r < in_rows && in_c >= 0 && in_c < in_cols) { - const int in_c = input_col_start + f_c; + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = input_row_start + filter_row; + const int filter_offset_temp = filter_width * filter_row; + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = input_col_start + filter_col; + if (in_row >= 0 && in_row < in_height && in_col >= 0 && + in_col < in_width) { + const int in_col = input_col_start + filter_col; const int input_offset = - in_d + in_depth * (in_c + in_cols * (in_r + input_offset_temp)); + in_channel + + in_depth * (in_col + in_width * (in_row + input_offset_temp)); const int filter_offset = - multiplier + depth_multiplier * - (in_d + in_depth * (f_c + filter_offset_temp)); + multiplier + + depth_multiplier * + (in_channel + in_depth * (filter_col + filter_offset_temp)); sum += ldg(input + input_offset) * ldg(filter + filter_offset); } } @@ -157,8 +166,8 @@ __global__ void __launch_bounds__(1024, 2) // Backprop input direction is the same as forward direction with the filter // rotated by 180°. template + int kKnownFilterWidth, int kKnownFilterHeight, int kBlockDepth, + bool kKnownEvenHeight> __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( const DepthwiseArgs args, const T* input, const T* filter, T* output) { assert(CanLaunchDepthwiseConv2dGPUSmall(args)); @@ -166,45 +175,45 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; T* const shared_data = reinterpret_cast(shared_memory); - const int batches = args.batch; - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int num_batches = args.batch; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; - const int block_rows = blockDim.z; + const int block_height = blockDim.z; // These values are the same for all threads and could // be precomputed on the CPU. - const int block_size = block_rows * in_cols * kBlockSlices; - const int in_row_size = in_cols * in_depth; - const int in_size = in_rows * in_row_size; - const int in_increment = (in_cols - 1) * kBlockSlices; - const int filter_pixels = filter_rows * filter_cols; - const int tile_cols = in_cols + filter_cols - 1; - const int even_rows = kKnownEvenRows || (1 & ~in_rows); - const int tile_rows = in_rows + filter_rows - even_rows; - const int tile_row_size = tile_cols * kBlockSlices; - const int tile_size = tile_rows * tile_row_size; - const int tile_offset = block_rows * tile_row_size; - const int pad_offset = pad_rows * tile_cols + pad_cols; - const int batch_blocks = (in_depth + kBlockSlices - 1) / kBlockSlices; - const int in_blocks = batch_blocks * batches; + const int block_size = block_height * in_width * kBlockDepth; + const int in_row_size = in_width * in_depth; + const int in_size = in_height * in_row_size; + const int in_increment = (in_width - 1) * kBlockDepth; + const int filter_pixels = filter_height * filter_width; + const int tile_width = in_width + filter_width - 1; + const int even_height = kKnownEvenHeight || (1 & ~in_height); + const int tile_height = in_height + filter_height - even_height; + const int tile_row_size = tile_width * kBlockDepth; + const int tile_size = tile_height * tile_row_size; + const int tile_offset = block_height * tile_row_size; + const int pad_offset = pad_height * tile_width + pad_width; + const int batch_blocks = (in_depth + kBlockDepth - 1) / kBlockDepth; + const int in_blocks = batch_blocks * num_batches; const int tensor_offset = - kKnownEvenRows ? in_size / 2 : block_rows * in_row_size; + kKnownEvenHeight ? in_size / 2 : block_height * in_row_size; const int thread_depth = threadIdx.x; const int thread_col = threadIdx.y; const int thread_row = threadIdx.z; // Position in block. - const int thread_pix = thread_row * in_cols + thread_col; - const int thread_idx = thread_pix * kBlockSlices + thread_depth; + const int thread_pix = thread_row * in_width + thread_col; + const int thread_idx = thread_pix * kBlockDepth + thread_depth; // Initialize tile, in particular the padding. for (int i = thread_idx; i < tile_size; i += block_size) { @@ -216,32 +225,32 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( const int tensor_idx = thread_pix * in_depth + thread_depth; // Position in (padded) shared memory. - const int data_pix = thread_row * tile_cols + thread_col; - const int data_idx = data_pix * kBlockSlices + thread_depth; + const int data_pix = thread_row * tile_width + thread_col; + const int data_idx = data_pix * kBlockDepth + thread_depth; - // Position in shared memory, offset by pad_rows / pad_cols. + // Position in shared memory, offset by pad_height / pad_width. const int tile_pix = data_pix + pad_offset; - const int tile_idx = tile_pix * kBlockSlices + thread_depth; + const int tile_idx = tile_pix * kBlockDepth + thread_depth; - const int max_depth = in_depth - thread_depth; + const int max_channel = in_depth - thread_depth; const int filter_write_offset = thread_pix < filter_pixels ? tile_size + thread_idx : 0; const int filter_read_offset = tile_size + thread_depth + - (kDirection == DIRECTION_FORWARD ? 0 : filter_pixels * kBlockSlices); + (kDirection == DIRECTION_FORWARD ? 0 : filter_pixels * kBlockDepth); const bool skip_second = - !kKnownEvenRows && thread_row + (in_rows & 1) == block_rows; + !kKnownEvenHeight && thread_row + (in_height & 1) == block_height; for (int b = blockIdx.x; b < in_blocks; b += gridDim.x) { const int batch = b / batch_blocks; - const int stack = b - batch * batch_blocks; + const int block = b - batch * batch_blocks; - const int start_depth = stack * kBlockSlices; - const int filter_offset = tensor_idx + start_depth; + const int start_channel = block * kBlockDepth; + const int filter_offset = tensor_idx + start_channel; const int inout_offset = batch * in_size + filter_offset; - const bool depth_in_range = start_depth < max_depth; + const bool channel_in_range = start_channel < max_channel; - if (depth_in_range) { + if (channel_in_range) { const T* const in_ptr = inout_offset + input; T* const tile_ptr = tile_idx + shared_data; tile_ptr[0] = ldg(in_ptr); @@ -257,23 +266,23 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNHWCSmall( // Note: the condition to reach this is uniform across the entire block. __syncthreads(); - if (depth_in_range) { + if (channel_in_range) { T sum1 = static_cast(0); T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; - UNROLL for (int r = 0; r < filter_rows; ++r) { - UNROLL for (int c = 0; c < filter_cols; ++c) { + UNROLL for (int r = 0; r < filter_height; ++r) { + UNROLL for (int c = 0; c < filter_width; ++c) { if (kDirection == DIRECTION_BACKWARD) { - filter_ptr -= kBlockSlices; + filter_ptr -= kBlockDepth; } const T filter_value = *filter_ptr; const T* const tile_ptr = shared_offset + shared_data; sum1 += filter_value * tile_ptr[0]; sum2 += filter_value * tile_ptr[tile_offset]; - shared_offset += kBlockSlices; + shared_offset += kBlockDepth; if (kDirection == DIRECTION_FORWARD) { - filter_ptr += kBlockSlices; + filter_ptr += kBlockDepth; } } shared_offset += in_increment; @@ -297,20 +306,20 @@ template (0); if (input_row_start >= 0 && input_col_start >= 0 && - input_row_end < in_rows && input_col_end < in_cols) { + input_row_end < in_height && input_col_end < in_width) { // Loop that doesn't need to check for boundary conditions. - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = input_row_start + f_r; - const int filter_offset_temp = filter_cols * f_r; - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = input_col_start + f_c; + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = input_row_start + filter_row; + const int filter_offset_temp = filter_width * filter_row; + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = input_col_start + filter_col; const int input_offset = - (input_offset_temp) + (in_r * in_cols) + in_c; + (input_offset_temp) + (in_row * in_width) + in_col; const int filter_offset = multiplier + - depth_multiplier * (in_d + in_depth * (f_c + filter_offset_temp)); + depth_multiplier * + (in_channel + in_depth * (filter_col + filter_offset_temp)); sum += ldg(input + input_offset) * ldg(filter + filter_offset); } } } else { // Loop that needs to check for boundary conditions. - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = input_row_start + f_r; - const int filter_offset_temp = filter_cols * f_r; - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = input_col_start + f_c; - // TODO(vrv): the in_r check can be done outside of this loop; + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = input_row_start + filter_row; + const int filter_offset_temp = filter_width * filter_row; + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = input_col_start + filter_col; + // TODO(vrv): the in_row check can be done outside of this loop; // benchmark both methods to determine the better decision. - if (in_r >= 0 && in_r < in_rows && in_c >= 0 && in_c < in_cols) { - const int in_c = input_col_start + f_c; + if (in_row >= 0 && in_row < in_height && in_col >= 0 && + in_col < in_width) { + const int in_col = input_col_start + filter_col; // input_offset_temp indexes into the start of memory // where the spatial data starts. const int input_offset = - (input_offset_temp) + (in_r * in_cols) + in_c; + (input_offset_temp) + (in_row * in_width) + in_col; const int filter_offset = - multiplier + depth_multiplier * - (in_d + in_depth * (f_c + filter_offset_temp)); + multiplier + + depth_multiplier * + (in_channel + in_depth * (filter_col + filter_offset_temp)); sum += ldg(input + input_offset) * ldg(filter + filter_offset); } } @@ -427,8 +444,8 @@ __global__ void __launch_bounds__(1024, 2) // Backprop input direction is the same as forward direction with the filter // rotated by 180°. template + int kKnownFilterWidth, int kKnownFilterHeight, int kBlockDepth, + bool kKnownEvenHeight> __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( const DepthwiseArgs args, const T* input, const T* filter, T* output) { assert(CanLaunchDepthwiseConv2dGPUSmall(args)); @@ -436,43 +453,43 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; T* const shared_data = reinterpret_cast(shared_memory); - const int batches = args.batch; - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int num_batches = args.batch; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; // Fixed blockDim.z, tailored for maximum grid size for images of size 16x16. - const int block_rows = blockDim.y; + const int block_height = blockDim.y; // These values are the same for all threads and could // be precomputed on the CPU. - const int block_pixels = in_cols * block_rows; - const int block_size = block_pixels * kBlockSlices; - const int in_pixels = in_cols * in_rows; - const int in_increment = in_cols - 1; - const int filter_pixels = filter_rows * filter_cols; - const int tile_cols = in_cols + filter_cols - 1; - const int even_rows = kKnownEvenRows || (1 & ~in_rows); - const int tile_rows = in_rows + filter_rows - even_rows; - const int tile_pixels = tile_cols * tile_rows; - const int tile_size = tile_pixels * kBlockSlices; - const int tile_offset = block_rows * tile_cols; - const int pad_offset = pad_rows * tile_cols + pad_cols; - const int in_slices = in_depth * batches; - const int in_blocks = (in_slices + kBlockSlices - 1) / kBlockSlices; + const int block_pixels = in_width * block_height; + const int block_size = block_pixels * kBlockDepth; + const int in_pixels = in_width * in_height; + const int in_increment = in_width - 1; + const int filter_pixels = filter_height * filter_width; + const int tile_width = in_width + filter_width - 1; + const int even_height = kKnownEvenHeight || (1 & ~in_height); + const int tile_height = in_height + filter_height - even_height; + const int tile_pixels = tile_width * tile_height; + const int tile_size = tile_pixels * kBlockDepth; + const int tile_offset = block_height * tile_width; + const int pad_offset = pad_height * tile_width + pad_width; + const int in_total_depth = in_depth * num_batches; + const int in_blocks = (in_total_depth + kBlockDepth - 1) / kBlockDepth; const int thread_col = threadIdx.x; const int thread_row = threadIdx.y; const int thread_depth = threadIdx.z; // Position in block. - const int thread_pix = thread_row * in_cols + thread_col; + const int thread_pix = thread_row * in_width + thread_col; const int thread_idx = thread_depth * block_pixels + thread_pix; // Initialize tile, in particular the padding. @@ -485,33 +502,33 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( const int tensor_idx = thread_depth * in_pixels + thread_pix; // Position in (padded) shared memory. - const int data_pix = thread_row * tile_cols + thread_col; + const int data_pix = thread_row * tile_width + thread_col; const int data_idx = thread_depth * tile_pixels + data_pix; - // Position in shared memory, offset by pad_rows / pad_cols. + // Position in shared memory, offset by pad_height / pad_width. const int tile_idx = data_idx + pad_offset; // Filter is always in HWCK format, irrespective of the input/output format. - const int filter_pix = thread_idx / kBlockSlices; - const int filter_depth = thread_idx % kBlockSlices; + const int filter_pix = thread_idx / kBlockDepth; + const int filter_channel = thread_idx % kBlockDepth; const int filter_idx = filter_pix * in_depth; - const int max_slice = in_slices - thread_depth; + const int max_channel = in_total_depth - thread_depth; const int filter_write_offset = filter_pix < filter_pixels ? tile_size + thread_idx : 0; const int filter_read_offset = tile_size + thread_depth + - (kDirection == DIRECTION_FORWARD ? 0 : filter_pixels * kBlockSlices); + (kDirection == DIRECTION_FORWARD ? 0 : filter_pixels * kBlockDepth); const bool skip_second = - !kKnownEvenRows && thread_row + (in_rows & 1) == block_rows; + !kKnownEvenHeight && thread_row + (in_height & 1) == block_height; for (int b = blockIdx.x; b < in_blocks; b += gridDim.x) { - const int slice = b * kBlockSlices; + const int channel = b * kBlockDepth; - const int inout_offset = slice * in_pixels + tensor_idx; - const bool slice_in_range = slice < max_slice; + const int inout_offset = channel * in_pixels + tensor_idx; + const bool channel_in_range = channel < max_channel; - if (slice_in_range) { + if (channel_in_range) { const T* const in_ptr = inout_offset + input; T* const tile_ptr = tile_idx + shared_data; tile_ptr[0] = ldg(in_ptr); @@ -521,22 +538,23 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( } if (filter_write_offset != 0) { - const int filter_offset = filter_idx + (slice + filter_depth) % in_depth; + const int filter_offset = + filter_idx + (channel + filter_channel) % in_depth; shared_data[filter_write_offset] = ldg(filter_offset + filter); } // Note: the condition to reach this is uniform across the entire block. __syncthreads(); - if (slice_in_range) { + if (channel_in_range) { T sum1 = static_cast(0); T sum2 = static_cast(0); int shared_offset = data_idx; const T* filter_ptr = filter_read_offset + shared_data; - UNROLL for (int r = 0; r < filter_rows; ++r) { - UNROLL for (int c = 0; c < filter_cols; ++c) { + UNROLL for (int r = 0; r < filter_height; ++r) { + UNROLL for (int c = 0; c < filter_width; ++c) { if (kDirection == DIRECTION_BACKWARD) { - filter_ptr -= kBlockSlices; + filter_ptr -= kBlockDepth; } const T filter_value = *filter_ptr; const T* const tile_ptr = shared_offset + shared_data; @@ -544,7 +562,7 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( sum2 += filter_value * tile_ptr[tile_offset]; ++shared_offset; if (kDirection == DIRECTION_FORWARD) { - filter_ptr += kBlockSlices; + filter_ptr += kBlockDepth; } } shared_offset += in_increment; @@ -562,89 +580,90 @@ __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dGPUKernelNCHWSmall( } template -void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& d, + int kKnownFilterWidth, int kKnownFilterHeight, int kBlockDepth, + bool kKnownEvenHeight> +void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& device, const DepthwiseArgs& args, const T* input, const T* filter, T* output, TensorFormat data_format) { - const int block_rows = (args.in_rows + 1) / 2; + const int block_height = (args.in_rows + 1) / 2; dim3 block_dim; void (*kernel)(const DepthwiseArgs, const T*, const T*, T*); if (data_format == FORMAT_NHWC) { - block_dim = dim3(kBlockSlices, args.in_cols, block_rows); + block_dim = dim3(kBlockDepth, args.in_cols, block_height); kernel = DepthwiseConv2dGPUKernelNHWCSmall; + kKnownFilterHeight, kBlockDepth, + kKnownEvenHeight>; } else if (data_format == FORMAT_NCHW) { - block_dim = dim3(args.in_cols, block_rows, kBlockSlices); + block_dim = dim3(args.in_cols, block_height, kBlockDepth); kernel = DepthwiseConv2dGPUKernelNCHWSmall; + kKnownFilterHeight, kBlockDepth, + kKnownEvenHeight>; } else { assert(false && "Incorrect data format"); return; } - const int tile_cols = args.in_cols + args.filter_cols - 1; - const int tile_rows = block_rows * 2 + args.filter_rows - 1; - const int tile_pixels = tile_rows * tile_cols; + const int tile_width = args.in_cols + args.filter_cols - 1; + const int tile_height = block_height * 2 + args.filter_rows - 1; + const int tile_pixels = tile_height * tile_width; const int filter_pixels = args.filter_rows * args.filter_cols; const int shared_memory_size = - kBlockSlices * (tile_pixels + filter_pixels) * sizeof(T); + kBlockDepth * (tile_pixels + filter_pixels) * sizeof(T); const int num_outputs = args.batch * args.out_rows * args.out_cols * args.out_depth; CudaLaunchConfig config = - GetCudaLaunchConfig(num_outputs, d, kernel, shared_memory_size, + GetCudaLaunchConfig(num_outputs, device, kernel, shared_memory_size, block_dim.x * block_dim.y * block_dim.z); - kernel<<>>( - args, input, filter, output); + kernel<<>>(args, input, filter, output); } template -void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& d, + int kKnownFilterWidth, int kKnownFilterHeight, int kBlockDepth> +void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& device, const DepthwiseArgs& args, const T* input, const T* filter, T* output, TensorFormat data_format) { if (args.in_rows & 1) { LaunchDepthwiseConv2dGPUSmall( - d, args, input, filter, output, data_format); + kKnownFilterHeight, kBlockDepth, false>( + device, args, input, filter, output, data_format); } else { LaunchDepthwiseConv2dGPUSmall( - d, args, input, filter, output, data_format); + kKnownFilterHeight, kBlockDepth, true>( + device, args, input, filter, output, data_format); } } template -void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& d, +void LaunchDepthwiseConv2dGPUSmall(const GpuDevice& device, const DepthwiseArgs& args, const T* input, const T* filter, T* output, TensorFormat data_format) { - // Maximize (power of two) kBlockSlices while keeping a block within 1024 + // Maximize (power of two) kBlockDepth while keeping a block within 1024 // threads (2 pixels per thread). const int block_pixels = (args.in_rows + 1) / 2 * args.in_cols; if (block_pixels > 256) { LaunchDepthwiseConv2dGPUSmall(d, args, input, filter, - output, data_format); + kKnownFilterHeight, 2>( + device, args, input, filter, output, data_format); } else if (block_pixels > 128) { LaunchDepthwiseConv2dGPUSmall(d, args, input, filter, - output, data_format); + kKnownFilterHeight, 4>( + device, args, input, filter, output, data_format); } else { LaunchDepthwiseConv2dGPUSmall(d, args, input, filter, - output, data_format); + kKnownFilterHeight, 8>( + device, args, input, filter, output, data_format); } } template -void LaunchDepthwiseConv2dGPU(const GpuDevice& d, const DepthwiseArgs& args, - const T* input, const T* filter, T* output, +void LaunchDepthwiseConv2dGPU(const GpuDevice& device, + const DepthwiseArgs& args, const T* input, + const T* filter, T* output, TensorFormat data_format) { void (*kernel)(const DepthwiseArgs, const T*, const T*, T*, int); if (data_format == FORMAT_NHWC) { @@ -661,34 +680,36 @@ void LaunchDepthwiseConv2dGPU(const GpuDevice& d, const DepthwiseArgs& args, } const int num_outputs = args.batch * args.out_rows * args.out_cols * args.out_depth; - CudaLaunchConfig config = GetCudaLaunchConfig(num_outputs, d, kernel, 0, 0); + CudaLaunchConfig config = + GetCudaLaunchConfig(num_outputs, device, kernel, 0, 0); // The compile-time constant version runs faster with a single block. const int max_block_count = kKnownFilterWidth < 0 || kKnownFilterHeight < 0 || kKnownDepthMultiplier < 0 ? std::numeric_limits::max() - : d.getNumCudaMultiProcessors(); + : device.getNumCudaMultiProcessors(); kernel<<>>(args, input, filter, - output, num_outputs); + config.thread_per_block, 0, device.stream()>>>(args, input, filter, + output, num_outputs); } template -void LaunchDepthwiseConv2dGPU(const GpuDevice& d, const DepthwiseArgs& args, - const T* input, const T* filter, T* output, +void LaunchDepthwiseConv2dGPU(const GpuDevice& device, + const DepthwiseArgs& args, const T* input, + const T* filter, T* output, TensorFormat data_format) { if (args.depth_multiplier == 1) { if (CanLaunchDepthwiseConv2dGPUSmall(args)) { LaunchDepthwiseConv2dGPUSmall(d, args, input, filter, - output, data_format); + kKnownFilterHeight>( + device, args, input, filter, output, data_format); return; } LaunchDepthwiseConv2dGPU( - d, args, input, filter, output, data_format); + device, args, input, filter, output, data_format); } else { LaunchDepthwiseConv2dGPU( - d, args, input, filter, output, data_format); + device, args, input, filter, output, data_format); } } @@ -699,12 +720,12 @@ void LaunchDepthwiseConvOp::operator()(OpKernelContext* ctx, const T* input, const T* filter, T* output, TensorFormat data_format) { - const GpuDevice& d = ctx->eigen_device(); + const GpuDevice& device = ctx->eigen_device(); if (args.filter_rows == 3 && args.filter_cols == 3) { - LaunchDepthwiseConv2dGPU(d, args, input, filter, output, + LaunchDepthwiseConv2dGPU(device, args, input, filter, output, data_format); } else { - LaunchDepthwiseConv2dGPU(d, args, input, filter, output, + LaunchDepthwiseConv2dGPU(device, args, input, filter, output, data_format); } auto stream = ctx->op_device_context()->stream(); @@ -725,59 +746,65 @@ __global__ void __launch_bounds__(640, 2) const T* out_backprop, const T* filter, T* in_backprop, int num_in_backprop) { - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; const int depth_multiplier = kKnownDepthMultiplier < 0 ? args.depth_multiplier : kKnownDepthMultiplier; const int stride = args.stride; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; - const int out_rows = args.out_rows; - const int out_cols = args.out_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; + const int out_height = args.out_rows; + const int out_width = args.out_cols; const int out_depth = args.out_depth; CUDA_1D_KERNEL_LOOP(thread_id, num_in_backprop) { // Compute the indexes of this thread in the output. - const int in_d = thread_id % in_depth; - const int in_c = (thread_id / in_depth) % in_cols; - const int in_r = (thread_id / in_depth / in_cols) % in_rows; - const int b = thread_id / in_depth / in_cols / in_rows; + const int in_channel = thread_id % in_depth; + const int in_col = (thread_id / in_depth) % in_width; + const int in_row = (thread_id / in_depth / in_width) % in_height; + const int batch = thread_id / in_depth / in_width / in_height; T sum = static_cast(0); - const int out_r_start = - tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); - const int out_r_end = tf_min(out_rows - 1, (in_r + pad_rows) / stride); - const int out_c_start = - tf_max(0, (in_c - filter_cols + pad_cols + stride) / stride); - const int out_c_end = tf_min(out_cols - 1, (in_c + pad_cols) / stride); - - NOUNROLL for (int out_r = out_r_start; out_r <= out_r_end; ++out_r) { - const int f_r = in_r + pad_rows - out_r * stride; + const int out_row_start = + tf_max(0, (in_row - filter_height + pad_height + stride) / stride); + const int out_row_end = + tf_min(out_height - 1, (in_row + pad_height) / stride); + const int out_col_start = + tf_max(0, (in_col - filter_width + pad_width + stride) / stride); + const int out_col_end = + tf_min(out_width - 1, (in_col + pad_width) / stride); + + NOUNROLL for (int out_row = out_row_start; out_row <= out_row_end; + ++out_row) { + const int filter_row = in_row + pad_height - out_row * stride; const int temp_out_backprop_offset = - out_depth * out_cols * (out_r + out_rows * b); - const int temp_filter_offset = filter_cols * f_r; - NOUNROLL for (int out_c = out_c_start; out_c <= out_c_end; ++out_c) { - const int f_c = in_c + pad_cols - out_c * stride; + out_depth * out_width * (out_row + out_height * batch); + const int temp_filter_offset = filter_width * filter_row; + NOUNROLL for (int out_col = out_col_start; out_col <= out_col_end; + ++out_col) { + const int filter_col = in_col + pad_width - out_col * stride; int filter_offset = - depth_multiplier * (in_d + in_depth * (f_c + temp_filter_offset)); + depth_multiplier * + (in_channel + in_depth * (filter_col + temp_filter_offset)); const int out_backprop_offset = - out_depth * out_c + temp_out_backprop_offset; + out_depth * out_col + temp_out_backprop_offset; #pragma unroll 6 for (int i = 0; i < depth_multiplier; ++i) { sum += ldg(out_backprop + out_backprop_offset + - in_d * depth_multiplier + i) * + in_channel * depth_multiplier + i) * ldg(filter + filter_offset + i); } } } const int in_backprop_offset = - in_d + in_depth * (in_c + in_cols * (in_r + in_rows * b)); + in_channel + + in_depth * (in_col + in_width * (in_row + in_height * batch)); in_backprop[in_backprop_offset] = sum; } } @@ -789,73 +816,79 @@ __global__ void __launch_bounds__(640, 2) const T* out_backprop, const T* filter, T* in_backprop, int num_in_backprop) { - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; const int depth_multiplier = kKnownDepthMultiplier < 0 ? args.depth_multiplier : kKnownDepthMultiplier; const int stride = args.stride; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; - const int out_rows = args.out_rows; - const int out_cols = args.out_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; + const int out_height = args.out_rows; + const int out_width = args.out_cols; const int out_depth = args.out_depth; // TODO(vrv): Consider assigning threads to output and using // atomics for accumulation, similar to the filter case. CUDA_1D_KERNEL_LOOP(thread_id, num_in_backprop) { // Compute the indexes of this thread in the input. - const int in_c = thread_id % in_cols; - const int in_r = (thread_id / in_cols) % in_rows; - const int in_d = (thread_id / in_cols / in_rows) % in_depth; - const int b = thread_id / in_depth / in_cols / in_rows; + const int in_col = thread_id % in_width; + const int in_row = (thread_id / in_width) % in_height; + const int in_channel = (thread_id / in_width / in_height) % in_depth; + const int batch = thread_id / in_depth / in_width / in_height; T sum = static_cast(0); - const int out_d_start = in_d * depth_multiplier; - const int out_d_end = out_d_start + depth_multiplier; - - const int out_r_start = - tf_max(0, (in_r - filter_rows + pad_rows + stride) / stride); - const int out_r_end = tf_min(out_rows - 1, (in_r + pad_rows) / stride); - const int out_c_start = - tf_max(0, (in_c - filter_cols + pad_cols + stride) / stride); - const int out_c_end = tf_min(out_cols - 1, (in_c + pad_cols) / stride); - - UNROLL for (int out_d = out_d_start; out_d < out_d_end; ++out_d) { - UNROLL for (int out_r = out_r_start; out_r <= out_r_end; ++out_r) { - const int f_r = in_r + pad_rows - out_r * stride; - const int filter_dm = out_d - out_d_start; - - const int temp_filter_offset = filter_cols * f_r; - for (int out_c = out_c_start; out_c <= out_c_end; ++out_c) { - const int f_c = in_c + pad_cols - out_c * stride; + const int out_channel_start = in_channel * depth_multiplier; + const int out_channel_end = out_channel_start + depth_multiplier; + + const int out_row_start = + tf_max(0, (in_row - filter_height + pad_height + stride) / stride); + const int out_row_end = + tf_min(out_height - 1, (in_row + pad_height) / stride); + const int out_col_start = + tf_max(0, (in_col - filter_width + pad_width + stride) / stride); + const int out_col_end = + tf_min(out_width - 1, (in_col + pad_width) / stride); + + UNROLL for (int out_channel = out_channel_start; + out_channel < out_channel_end; ++out_channel) { + UNROLL for (int out_row = out_row_start; out_row <= out_row_end; + ++out_row) { + const int filter_row = in_row + pad_height - out_row * stride; + const int filter_dm = out_channel - out_channel_start; + + const int temp_filter_offset = filter_width * filter_row; + for (int out_col = out_col_start; out_col <= out_col_end; ++out_col) { + const int filter_col = in_col + pad_width - out_col * stride; const int filter_offset = - filter_dm + args.depth_multiplier * - (in_d + in_depth * (f_c + temp_filter_offset)); + filter_dm + + args.depth_multiplier * + (in_channel + in_depth * (filter_col + temp_filter_offset)); const int out_backprop_offset = - (b * out_depth * out_rows * out_cols) + - (out_d * out_rows * out_cols) + (out_r * out_cols) + (out_c); + (batch * out_depth * out_height * out_width) + + (out_channel * out_height * out_width) + (out_row * out_width) + + (out_col); sum += ldg(out_backprop + out_backprop_offset) * ldg(filter + filter_offset); } } } - const int in_backprop_offset = (b * in_rows * in_cols * in_depth) + - (in_d * in_rows * in_cols) + - (in_r * in_cols) + (in_c); + const int in_backprop_offset = (batch * in_height * in_width * in_depth) + + (in_channel * in_height * in_width) + + (in_row * in_width) + (in_col); in_backprop[in_backprop_offset] = sum; } } template -void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& d, +void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& device, const DepthwiseArgs& args, const T* out_backprop, const T* filter, T* in_backprop, @@ -874,13 +907,13 @@ void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& d, const int num_in_backprop = args.batch * args.in_rows * args.in_cols * args.in_depth; CudaLaunchConfig config = - GetCudaLaunchConfig(num_in_backprop, d, kernel, 0, 0); - kernel<<>>( + GetCudaLaunchConfig(num_in_backprop, device, kernel, 0, 0); + kernel<<>>( args, out_backprop, filter, in_backprop, num_in_backprop); } template -void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& d, +void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& device, const DepthwiseArgs& args, const T* out_backprop, const T* filter, T* in_backprop, @@ -889,17 +922,17 @@ void LaunchDepthwiseConv2dBackpropInputGPU(const GpuDevice& d, if (CanLaunchDepthwiseConv2dGPUSmall(args)) { LaunchDepthwiseConv2dGPUSmall( - d, args, out_backprop, filter, in_backprop, data_format); + device, args, out_backprop, filter, in_backprop, data_format); return; } LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); + device, args, out_backprop, filter, in_backprop, data_format); } else { LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); + device, args, out_backprop, filter, in_backprop, data_format); } } @@ -908,13 +941,13 @@ template void LaunchDepthwiseConvBackpropInputOp::operator()( OpKernelContext* ctx, const DepthwiseArgs& args, const T* out_backprop, const T* filter, T* in_backprop, TensorFormat data_format) { - const GpuDevice& d = ctx->eigen_device(); + const GpuDevice& device = ctx->eigen_device(); if (args.filter_rows == 3 && args.filter_cols == 3) { LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); + device, args, out_backprop, filter, in_backprop, data_format); } else { LaunchDepthwiseConv2dBackpropInputGPU( - d, args, out_backprop, filter, in_backprop, data_format); + device, args, out_backprop, filter, in_backprop, data_format); } auto stream = ctx->op_device_context()->stream(); OP_REQUIRES(ctx, stream->ok(), @@ -936,75 +969,85 @@ __global__ void __launch_bounds__(640, 2) const T* input, T* filter_backprop, int num_out_backprop) { - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; const int depth_multiplier = kKnownDepthMultiplier < 0 ? args.depth_multiplier : kKnownDepthMultiplier; const int stride = args.stride; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; - const int out_rows = args.out_rows; - const int out_cols = args.out_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; + const int out_height = args.out_rows; + const int out_width = args.out_cols; const int out_depth = args.out_depth; CUDA_1D_KERNEL_LOOP(thread_id, num_out_backprop) { // Compute the indexes of this thread in the output. - const int out_d = thread_id % out_depth; - const int out_c = (thread_id / out_depth) % out_cols; - const int out_r = (thread_id / out_depth / out_cols) % out_rows; - const int b = thread_id / out_depth / out_cols / out_rows; + const int out_channel = thread_id % out_depth; + const int out_col = (thread_id / out_depth) % out_width; + const int out_row = (thread_id / out_depth / out_width) % out_height; + const int batch = thread_id / out_depth / out_width / out_height; // Compute the input depth and the index of depth multiplier. - const int in_d = out_d / depth_multiplier; - const int dm = out_d % depth_multiplier; + const int in_channel = out_channel / depth_multiplier; + const int dm = out_channel % depth_multiplier; // Decide if all input is valid, if yes, we can skip the boundary checks // for each input. - const int in_r_start = out_r * stride - pad_rows; - const int in_c_start = out_c * stride - pad_cols; - const int in_r_end = in_r_start + filter_rows; - const int in_c_end = in_c_start + filter_cols; + const int in_row_start = out_row * stride - pad_height; + const int in_col_start = out_col * stride - pad_width; + const int in_row_end = in_row_start + filter_height; + const int in_col_end = in_col_start + filter_width; const int out_backprop_offset = - out_d + out_depth * (out_c + out_cols * (out_r + out_rows * b)); + out_channel + + out_depth * (out_col + out_width * (out_row + out_height * batch)); const T out_bp = ldg(out_backprop + out_backprop_offset); - if (in_r_start >= 0 && in_c_start >= 0 && in_r_end < in_rows && - in_c_end < in_cols) { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = in_r_start + f_r; + if (in_row_start >= 0 && in_col_start >= 0 && in_row_end < in_height && + in_col_end < in_width) { + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = in_row_start + filter_row; // Avoid repeated computation. - const int input_offset_temp = in_cols * (in_r + in_rows * b); - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = in_c_start + f_c; + const int input_offset_temp = in_width * (in_row + in_height * batch); + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = in_col_start + filter_col; - const int input_offset = in_d + in_depth * (in_c + input_offset_temp); + const int input_offset = + in_channel + in_depth * (in_col + input_offset_temp); T partial_sum = ldg(input + input_offset) * out_bp; - T* addr = filter_backprop + - (dm + depth_multiplier * - (in_d + in_depth * (f_c + filter_cols * f_r))); + T* addr = + filter_backprop + + (dm + depth_multiplier * + (in_channel + + in_depth * (filter_col + filter_width * filter_row))); CudaAtomicAdd(addr, partial_sum); } } } else { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = in_r_start + f_r; + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = in_row_start + filter_row; // Avoid repeated computation. - const int input_offset_temp = in_cols * (in_r + in_rows * b); - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = in_c_start + f_c; - const int addr_temp = filter_cols * f_r; - - if (in_r >= 0 && in_r < in_rows && in_c >= 0 && in_c < in_cols) { + const int input_offset_temp = in_width * (in_row + in_height * batch); + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = in_col_start + filter_col; + const int addr_temp = filter_width * filter_row; + + if (in_row >= 0 && in_row < in_height && in_col >= 0 && + in_col < in_width) { const int input_offset = - in_d + in_depth * (in_c + input_offset_temp); + in_channel + in_depth * (in_col + input_offset_temp); T partial_sum = ldg(input + input_offset) * out_bp; T* addr = filter_backprop + - (dm + depth_multiplier * (in_d + in_depth * (f_c + addr_temp))); + (dm + depth_multiplier * + (in_channel + in_depth * (filter_col + addr_temp))); // Potentially many threads can add to the same address so we have // to use atomic add here. // TODO(jmchen): If atomic add turns out to be slow, we can: @@ -1048,9 +1091,9 @@ __device__ __forceinline__ T WarpSumReduce(T val) { // memory are warp-accumulated (in chunks of kAccumPixels elements) and summed // up in global memory using atomics. // Requirements: threads per block must be multiple of 32 and <= launch_bounds, -// kAccumPixels * 64 >= args.in_rows * args.in_cols * kBlockSlices. +// kAccumPixels * 64 >= args.in_rows * args.in_cols * kBlockDepth. template + int kBlockDepth, int kAccumPixels> __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( const DepthwiseArgs args, const T* output, const T* input, T* filter) { @@ -1059,40 +1102,40 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; T* const shared_data = reinterpret_cast(shared_memory); - const int batches = args.batch; - const int in_rows = args.in_rows; - const int in_cols = blockDim.y; // slower (see b/62280718): args.in_cols; + const int num_batches = args.batch; + const int in_height = args.in_rows; + const int in_width = blockDim.y; // slower (see b/62280718): args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; - const int block_rows = blockDim.z; + const int block_height = blockDim.z; // These values are the same for all threads and could // be precomputed on the CPU. - const int block_size = block_rows * in_cols * kBlockSlices; + const int block_size = block_height * in_width * kBlockDepth; assert((block_size & 31) == 0); - const int in_row_size = in_cols * in_depth; - const int in_size = in_rows * in_row_size; - const int in_increment = (in_cols - 1) * kBlockSlices; - const int filter_pixels = filter_rows * filter_cols; - const int tile_cols = in_cols + filter_cols - 1; - const int tile_rows = 2 * block_rows + filter_rows - 1; - const int tile_row_size = tile_cols * kBlockSlices; - const int tile_size = tile_rows * tile_row_size; - const int tile_offset = block_rows * tile_row_size; - const int pad_offset = pad_rows * tile_cols + pad_cols; - const int batch_blocks = (in_depth + kBlockSlices - 1) / kBlockSlices; - const int in_blocks = batch_blocks * batches; - const int tensor_offset = block_rows * in_row_size; + const int in_row_size = in_width * in_depth; + const int in_size = in_height * in_row_size; + const int in_increment = (in_width - 1) * kBlockDepth; + const int filter_pixels = filter_height * filter_width; + const int tile_width = in_width + filter_width - 1; + const int tile_height = 2 * block_height + filter_height - 1; + const int tile_row_size = tile_width * kBlockDepth; + const int tile_size = tile_height * tile_row_size; + const int tile_offset = block_height * tile_row_size; + const int pad_offset = pad_height * tile_width + pad_width; + const int batch_blocks = (in_depth + kBlockDepth - 1) / kBlockDepth; + const int in_blocks = batch_blocks * num_batches; + const int tensor_offset = block_height * in_row_size; // The accumulator has a fixed number of pixels that can be reduced by one - // warp. Pixels beyond ceil(in_pixels * kBlockSlices / 64) are never written. - assert(kAccumPixels * 64 >= in_rows * in_cols * kBlockSlices); - const int accum_increment = kAccumPixels * kBlockSlices; + // warp. Pixels beyond ceil(in_pixels * kBlockDepth / 64) are never written. + assert(kAccumPixels * 64 >= in_height * in_width * kBlockDepth); + const int accum_increment = kAccumPixels * kBlockDepth; const int accum_size = filter_pixels * accum_increment; const int thread_depth = threadIdx.x; @@ -1100,8 +1143,8 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( const int thread_row = threadIdx.z; // Position in block. - const int thread_pix = thread_row * in_cols + thread_col; - const int thread_idx = thread_pix * kBlockSlices + thread_depth; + const int thread_pix = thread_row * in_width + thread_col; + const int thread_idx = thread_pix * kBlockDepth + thread_depth; // Initialize tile, in particular the padding and accumulator. for (int i = thread_idx; i < tile_size + accum_size; i += block_size) { @@ -1113,31 +1156,31 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( const int tensor_idx = thread_pix * in_depth + thread_depth; // Position in (padded) shared memory. - const int data_pix = thread_row * tile_cols + thread_col; - const int data_idx = data_pix * kBlockSlices + thread_depth; + const int data_pix = thread_row * tile_width + thread_col; + const int data_idx = data_pix * kBlockDepth + thread_depth; - // Position in shared memory, offset by pad_rows / pad_cols. + // Position in shared memory, offset by pad_height / pad_width. const int tile_pix = data_pix + pad_offset; - const int tile_idx = tile_pix * kBlockSlices + thread_depth; + const int tile_idx = tile_pix * kBlockDepth + thread_depth; - // Position in accumulator (kBlockSlices per warp, depth major). - const int accum_pix = thread_pix / (32 / kBlockSlices); + // Position in accumulator (kBlockDepth per warp, depth major). + const int accum_pix = thread_pix / (32 / kBlockDepth); const int accum_idx = thread_depth * kAccumPixels + accum_pix; - const int max_depth = in_depth - thread_depth; + const int max_channel = in_depth - thread_depth; const int accum_offset = tile_size + accum_idx; - const bool skip_second = block_rows + thread_row >= in_rows; + const bool skip_second = block_height + thread_row >= in_height; for (int b = blockIdx.x; b < in_blocks; b += gridDim.x) { const int batch = b / batch_blocks; - const int stack = b - batch * batch_blocks; + const int block = b - batch * batch_blocks; - const int start_depth = stack * kBlockSlices; - const int filter_offset = tensor_idx + start_depth; + const int start_channel = block * kBlockDepth; + const int filter_offset = tensor_idx + start_channel; const int inout_offset = batch * in_size + filter_offset; - const bool depth_in_range = start_depth < max_depth; + const bool channel_in_range = start_channel < max_channel; - if (depth_in_range) { + if (channel_in_range) { const T* const in_ptr = inout_offset + input; T* const tile_ptr = tile_idx + shared_data; tile_ptr[0] = ldg(in_ptr); @@ -1148,26 +1191,26 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( // Note: the condition to reach this is uniform across the entire block. __syncthreads(); - unsigned active_threads = CudaBallotSync(kCudaWarpAll, depth_in_range); + unsigned active_threads = CudaBallotSync(kCudaWarpAll, channel_in_range); - if (depth_in_range) { + if (channel_in_range) { const T* const out_ptr = inout_offset + output; const T out1 = ldg(out_ptr); const T out2 = skip_second ? T(0) : ldg(tensor_offset + out_ptr); int shared_offset = data_idx; T* accum_ptr = accum_offset + shared_data; - UNROLL for (int r = 0; r < filter_rows; ++r) { - UNROLL for (int c = 0; c < filter_cols; ++c) { + UNROLL for (int r = 0; r < filter_height; ++r) { + UNROLL for (int c = 0; c < filter_width; ++c) { const T* const tile_ptr = shared_offset + shared_data; T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset]; // Warp-accumulate pixels of the same depth and write to accumulator. - for (int delta = 16; delta >= kBlockSlices; delta /= 2) { + for (int delta = 16; delta >= kBlockDepth; delta /= 2) { val += CudaShuffleXorSync(active_threads, val, delta); } - if (!(thread_idx & 32 - kBlockSlices) /* lane_idx < kBlockSlices */) { + if (!(thread_idx & 32 - kBlockDepth) /* lane_idx < kBlockDepth */) { *accum_ptr = val; } - shared_offset += kBlockSlices; + shared_offset += kBlockDepth; accum_ptr += accum_increment; } shared_offset += in_increment; @@ -1180,10 +1223,10 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall( const T* const accum_data = tile_size + shared_data; for (int i = thread_idx; i < accum_size; i += block_size) { const int filter_idx = i / kAccumPixels; - const int filter_pix = filter_idx / kBlockSlices; - const int filter_depth = filter_idx % kBlockSlices + start_depth; - const int filter_offset = filter_pix * in_depth + filter_depth; - if (filter_depth < in_depth) { + const int filter_pix = filter_idx / kBlockDepth; + const int filter_channel = filter_idx % kBlockDepth + start_channel; + const int filter_offset = filter_pix * in_depth + filter_channel; + if (filter_channel < in_depth) { T val = accum_data[i]; // Warp-accumulate the pixels of the same depth from the accumulator. val = WarpSumReduce(val); @@ -1204,81 +1247,90 @@ __global__ void __launch_bounds__(640, 2) const T* input, T* filter_backprop, int num_out_backprop) { - const int in_rows = args.in_rows; - const int in_cols = args.in_cols; + const int in_height = args.in_rows; + const int in_width = args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; const int depth_multiplier = kKnownDepthMultiplier < 0 ? args.depth_multiplier : kKnownDepthMultiplier; const int stride = args.stride; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; - const int out_rows = args.out_rows; - const int out_cols = args.out_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; + const int out_height = args.out_rows; + const int out_width = args.out_cols; const int out_depth = args.out_depth; CUDA_1D_KERNEL_LOOP(thread_id, num_out_backprop) { // Compute the indexes of this thread in the output. - const int out_c = thread_id % out_cols; - const int out_r = (thread_id / out_cols) % out_rows; - const int out_d = (thread_id / out_cols / out_rows) % out_depth; + const int out_col = thread_id % out_width; + const int out_row = (thread_id / out_width) % out_height; + const int out_channel = (thread_id / out_width / out_height) % out_depth; - const int b = thread_id / out_depth / out_cols / out_rows; + const int batch = thread_id / out_depth / out_width / out_height; // Compute the input depth and the index of depth multiplier. - const int in_d = out_d / depth_multiplier; - const int dm = out_d % depth_multiplier; + const int in_channel = out_channel / depth_multiplier; + const int dm = out_channel % depth_multiplier; // Decide if all input is valid, if yes, we can skip the boundary checks // for each input. - const int in_r_start = out_r * stride - pad_rows; - const int in_c_start = out_c * stride - pad_cols; - const int in_r_end = in_r_start + filter_rows; - const int in_c_end = in_c_start + filter_cols; + const int in_row_start = out_row * stride - pad_height; + const int in_col_start = out_col * stride - pad_width; + const int in_row_end = in_row_start + filter_height; + const int in_col_end = in_col_start + filter_width; - const int out_backprop_offset = (b * out_depth * out_rows * out_cols) + - (out_d * out_rows * out_cols) + - (out_r * out_cols) + (out_c); + const int out_backprop_offset = + (batch * out_depth * out_height * out_width) + + (out_channel * out_height * out_width) + (out_row * out_width) + + (out_col); const T out_bp = ldg(out_backprop + out_backprop_offset); - if (in_r_start >= 0 && in_c_start >= 0 && in_r_end < in_rows && - in_c_end < in_cols) { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = in_r_start + f_r; + if (in_row_start >= 0 && in_col_start >= 0 && in_row_end < in_height && + in_col_end < in_width) { + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = in_row_start + filter_row; // Avoid repeated computation. - const int input_offset_temp = (b * in_depth * in_rows * in_cols) + - (in_d * in_rows * in_cols) + - (in_r * in_cols); - - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = in_c_start + f_c; - const int input_offset = input_offset_temp + in_c; + const int input_offset_temp = + (batch * in_depth * in_height * in_width) + + (in_channel * in_height * in_width) + (in_row * in_width); + + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = in_col_start + filter_col; + const int input_offset = input_offset_temp + in_col; T partial_sum = ldg(input + input_offset) * out_bp; - T* addr = filter_backprop + - (dm + depth_multiplier * - (in_d + in_depth * (f_c + filter_cols * f_r))); + T* addr = + filter_backprop + + (dm + depth_multiplier * + (in_channel + + in_depth * (filter_col + filter_width * filter_row))); CudaAtomicAdd(addr, partial_sum); } } } else { - UNROLL for (int f_r = 0; f_r < filter_rows; ++f_r) { - const int in_r = in_r_start + f_r; + UNROLL for (int filter_row = 0; filter_row < filter_height; + ++filter_row) { + const int in_row = in_row_start + filter_row; // Avoid repeated computation. - const int input_offset_temp = (b * in_depth * in_rows * in_cols) + - (in_d * in_rows * in_cols) + - (in_r * in_cols); - UNROLL for (int f_c = 0; f_c < filter_cols; ++f_c) { - const int in_c = in_c_start + f_c; - const int addr_temp = filter_cols * f_r; - - if (in_r >= 0 && in_r < in_rows && in_c >= 0 && in_c < in_cols) { - const int input_offset = input_offset_temp + in_c; + const int input_offset_temp = + (batch * in_depth * in_height * in_width) + + (in_channel * in_height * in_width) + (in_row * in_width); + UNROLL for (int filter_col = 0; filter_col < filter_width; + ++filter_col) { + const int in_col = in_col_start + filter_col; + const int addr_temp = filter_width * filter_row; + + if (in_row >= 0 && in_row < in_height && in_col >= 0 && + in_col < in_width) { + const int input_offset = input_offset_temp + in_col; T partial_sum = ldg(input + input_offset) * out_bp; T* addr = filter_backprop + - (dm + depth_multiplier * (in_d + in_depth * (f_c + addr_temp))); + (dm + depth_multiplier * + (in_channel + in_depth * (filter_col + addr_temp))); // Potentially many threads can add to the same address so we have // to use atomic add here. // TODO(jmchen): If atomic add turns out to be slow, we can: @@ -1307,9 +1359,9 @@ __global__ void __launch_bounds__(640, 2) // memory are warp-accumulated (in chunks of kAccumPixels elements) and summed // up in global memory using atomics. // Requirements: threads per block must be multiple of 32 and <= launch_bounds, -// kAccumPixels * 64 >= args.in_rows * args.in_cols * kBlockSlices. +// kAccumPixels * 64 >= args.in_rows * args.in_cols * kBlockDepth. template + int kBlockDepth, int kAccumPixels> __global__ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( const DepthwiseArgs args, const T* output, const T* input, T* filter) { @@ -1318,39 +1370,39 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( extern __shared__ __align__(sizeof(T)) unsigned char shared_memory[]; T* const shared_data = reinterpret_cast(shared_memory); - const int batches = args.batch; - const int in_rows = args.in_rows; - const int in_cols = blockDim.x; // slower (see b/62280718): args.in_cols; + const int num_batches = args.batch; + const int in_height = args.in_rows; + const int in_width = blockDim.x; // slower (see b/62280718): args.in_cols; const int in_depth = args.in_depth; - const int filter_rows = + const int filter_height = kKnownFilterHeight < 0 ? args.filter_rows : kKnownFilterHeight; - const int filter_cols = + const int filter_width = kKnownFilterWidth < 0 ? args.filter_cols : kKnownFilterWidth; - const int pad_rows = args.pad_rows; - const int pad_cols = args.pad_cols; + const int pad_height = args.pad_rows; + const int pad_width = args.pad_cols; - const int block_rows = blockDim.y; + const int block_height = blockDim.y; // These values are the same for all threads and could // be precomputed on the CPU. - const int block_pixels = in_cols * block_rows; - const int block_size = block_pixels * kBlockSlices; + const int block_pixels = in_width * block_height; + const int block_size = block_pixels * kBlockDepth; assert((block_size & 31) == 0); - const int in_pixels = in_cols * in_rows; - const int in_increment = in_cols - 1; - const int filter_pixels = filter_rows * filter_cols; - const int tile_cols = in_cols + filter_cols - 1; - const int tile_rows = 2 * block_rows + filter_rows - 1; - const int tile_pixels = tile_cols * tile_rows; - const int tile_size = tile_pixels * kBlockSlices; - const int tile_offset = block_rows * tile_cols; - const int pad_offset = pad_rows * tile_cols + pad_cols; - const int in_slices = in_depth * batches; - const int in_blocks = (in_slices + kBlockSlices - 1) / kBlockSlices; + const int in_pixels = in_width * in_height; + const int in_increment = in_width - 1; + const int filter_pixels = filter_height * filter_width; + const int tile_width = in_width + filter_width - 1; + const int tile_height = 2 * block_height + filter_height - 1; + const int tile_pixels = tile_width * tile_height; + const int tile_size = tile_pixels * kBlockDepth; + const int tile_offset = block_height * tile_width; + const int pad_offset = pad_height * tile_width + pad_width; + const int in_total_depth = in_depth * num_batches; + const int in_blocks = (in_total_depth + kBlockDepth - 1) / kBlockDepth; // The accumulator has a fixed number of pixels that can be reduced by one - // warp. Pixels beyond ceil(in_pixels * kBlockSlices / 64) are never written. - assert(kAccumPixels * 64 >= in_rows * in_cols * kBlockSlices); - const int accum_increment = kAccumPixels * kBlockSlices; + // warp. Pixels beyond ceil(in_pixels * kBlockDepth / 64) are never written. + assert(kAccumPixels * 64 >= in_height * in_width * kBlockDepth); + const int accum_increment = kAccumPixels * kBlockDepth; const int accum_size = filter_pixels * accum_increment; const int thread_col = threadIdx.x; @@ -1358,7 +1410,7 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( const int thread_depth = threadIdx.z; // Position in block. - const int thread_pix = thread_row * in_cols + thread_col; + const int thread_pix = thread_row * in_width + thread_col; const int thread_idx = thread_depth * block_pixels + thread_pix; // Initialize tile, in particular the padding and accumulator. @@ -1371,27 +1423,27 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( const int tensor_idx = thread_depth * in_pixels + thread_pix; // Position in (padded) shared memory. - const int data_pix = thread_row * tile_cols + thread_col; + const int data_pix = thread_row * tile_width + thread_col; const int data_idx = thread_depth * tile_pixels + data_pix; - // Position in shared memory, offset by pad_rows / pad_cols. + // Position in shared memory, offset by pad_height / pad_width. const int tile_idx = data_idx + pad_offset; - // Position in accumulator (kBlockSlices per warp, depth major). - const int accum_pix = thread_pix / (32 / kBlockSlices); + // Position in accumulator (kBlockDepth per warp, depth major). + const int accum_pix = thread_pix / (32 / kBlockDepth); const int accum_idx = thread_depth * kAccumPixels + accum_pix; - const int max_slice = in_slices - thread_depth; + const int max_channel = in_total_depth - thread_depth; const int accum_offset = tile_size + accum_idx; - const bool skip_second = block_rows + thread_row >= in_rows; + const bool skip_second = block_height + thread_row >= in_height; for (int b = blockIdx.x; b < in_blocks; b += gridDim.x) { - const int slice = b * kBlockSlices; + const int channel = b * kBlockDepth; - const int inout_offset = slice * in_pixels + tensor_idx; - const bool slice_in_range = slice < max_slice; + const int inout_offset = channel * in_pixels + tensor_idx; + const bool channel_in_range = channel < max_channel; - if (slice_in_range) { + if (channel_in_range) { const T* const in_ptr = inout_offset + input; T* const tile_ptr = tile_idx + shared_data; tile_ptr[0] = ldg(in_ptr); @@ -1402,24 +1454,24 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( // Note: the condition to reach this is uniform across the entire block. __syncthreads(); - unsigned active_threads = CudaBallotSync(kCudaWarpAll, slice_in_range); + unsigned active_threads = CudaBallotSync(kCudaWarpAll, channel_in_range); - if (slice_in_range) { + if (channel_in_range) { const T* const out_ptr = inout_offset + output; const T out1 = ldg(out_ptr); const T out2 = skip_second ? T(0) : ldg(block_pixels + out_ptr); int shared_offset = data_idx; T* accum_ptr = accum_offset + shared_data; - UNROLL for (int r = 0; r < filter_rows; ++r) { - UNROLL for (int c = 0; c < filter_cols; ++c) { + UNROLL for (int r = 0; r < filter_height; ++r) { + UNROLL for (int c = 0; c < filter_width; ++c) { const T* const tile_ptr = shared_offset + shared_data; T val = out1 * tile_ptr[0] + out2 * tile_ptr[tile_offset]; // Warp-accumulate pixels of the same depth and write to accumulator. - for (int delta = 16 / kBlockSlices; delta > 0; delta /= 2) { + for (int delta = 16 / kBlockDepth; delta > 0; delta /= 2) { val += CudaShuffleXorSync(active_threads, val, delta); } - if (!(thread_idx & 32 / kBlockSlices - 1)) { - *accum_ptr = val; // kBlockSlices threads per warp. + if (!(thread_idx & 32 / kBlockDepth - 1)) { + *accum_ptr = val; // kBlockDepth threads per warp. } ++shared_offset; accum_ptr += accum_increment; @@ -1434,10 +1486,11 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( const T* const accum_data = tile_size + shared_data; for (int i = thread_idx; i < accum_size; i += block_size) { const int filter_idx = i / kAccumPixels; - const int filter_pix = filter_idx / kBlockSlices; - const int filter_depth = (slice + filter_idx % kBlockSlices) % in_depth; - const int filter_offset = filter_pix * in_depth + filter_depth; - if (filter_depth < in_depth) { + const int filter_pix = filter_idx / kBlockDepth; + const int filter_channel = + (channel + filter_idx % kBlockDepth) % in_depth; + const int filter_offset = filter_pix * in_depth + filter_channel; + if (filter_channel < in_depth) { T val = accum_data[i]; // Warp-accumulate pixels of the same depth from the accumulator. val = WarpSumReduce(val); @@ -1450,31 +1503,31 @@ __launch_bounds__(1024, 2) void DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall( } template + int kBlockDepth, int kAccumPixels> bool TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( - const GpuDevice& d, const DepthwiseArgs& args, const int block_rows, + const GpuDevice& device, const DepthwiseArgs& args, const int block_height, const T* out_backprop, const T* input, T* filter_backprop, TensorFormat data_format) { - const int tile_cols = args.in_cols + args.filter_cols - 1; - const int tile_rows = block_rows * 2 + args.filter_rows - 1; - const int tile_pixels = tile_rows * tile_cols; + const int tile_width = args.in_cols + args.filter_cols - 1; + const int tile_height = block_height * 2 + args.filter_rows - 1; + const int tile_pixels = tile_height * tile_width; const int filter_pixels = args.filter_rows * args.filter_cols; const int shared_memory_size = - kBlockSlices * (tile_pixels + filter_pixels * kAccumPixels) * sizeof(T); - if (shared_memory_size > d.sharedMemPerBlock()) { + kBlockDepth * (tile_pixels + filter_pixels * kAccumPixels) * sizeof(T); + if (shared_memory_size > device.sharedMemPerBlock()) { return false; } dim3 block_dim; void (*kernel)(const DepthwiseArgs, const T*, const T*, T*); if (data_format == FORMAT_NHWC) { - block_dim = dim3(kBlockSlices, args.in_cols, block_rows); + block_dim = dim3(kBlockDepth, args.in_cols, block_height); kernel = DepthwiseConv2dBackpropFilterGPUKernelNHWCSmall< - T, kKnownFilterWidth, kKnownFilterHeight, kBlockSlices, kAccumPixels>; + T, kKnownFilterWidth, kKnownFilterHeight, kBlockDepth, kAccumPixels>; } else if (data_format == FORMAT_NCHW) { - block_dim = dim3(args.in_cols, block_rows, kBlockSlices); + block_dim = dim3(args.in_cols, block_height, kBlockDepth); kernel = DepthwiseConv2dBackpropFilterGPUKernelNCHWSmall< - T, kKnownFilterWidth, kKnownFilterHeight, kBlockSlices, kAccumPixels>; + T, kKnownFilterWidth, kKnownFilterHeight, kBlockDepth, kAccumPixels>; } else { assert(false && "Incorrect data format"); return false; @@ -1482,77 +1535,80 @@ bool TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( const int num_out_backprop = args.batch * args.out_rows * args.out_cols * args.out_depth; CudaLaunchConfig config = - GetCudaLaunchConfig(num_out_backprop, d, kernel, shared_memory_size, + GetCudaLaunchConfig(num_out_backprop, device, kernel, shared_memory_size, block_dim.x * block_dim.y * block_dim.z); - kernel<<>>( - args, out_backprop, input, filter_backprop); + kernel<<>>(args, out_backprop, input, filter_backprop); return true; } template + int kBlockDepth> bool TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( - const GpuDevice& d, const DepthwiseArgs& args, const int block_rows, + const GpuDevice& device, const DepthwiseArgs& args, const int block_height, const T* out_backprop, const T* input, T* filter_backprop, TensorFormat data_format) { // Minimize (power of two) kAccumPixels, while satisfying - // kAccumPixels * 32 >= block_rows * in_cols * kBlockSlices. - const int block_pixels = block_rows * args.in_cols * kBlockSlices; + // kAccumPixels * 32 >= block_height * in_width * kBlockDepth. + const int block_pixels = block_height * args.in_cols * kBlockDepth; if (block_pixels > 512) { return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< - T, kKnownFilterWidth, kKnownFilterHeight, kBlockSlices, 32>( - d, args, block_rows, out_backprop, input, filter_backprop, data_format); + T, kKnownFilterWidth, kKnownFilterHeight, kBlockDepth, 32>( + device, args, block_height, out_backprop, input, filter_backprop, + data_format); } else if (block_pixels > 256) { return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< - T, kKnownFilterWidth, kKnownFilterHeight, kBlockSlices, 16>( - d, args, block_rows, out_backprop, input, filter_backprop, data_format); + T, kKnownFilterWidth, kKnownFilterHeight, kBlockDepth, 16>( + device, args, block_height, out_backprop, input, filter_backprop, + data_format); } else { return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< - T, kKnownFilterWidth, kKnownFilterHeight, kBlockSlices, 8>( - d, args, block_rows, out_backprop, input, filter_backprop, data_format); + T, kKnownFilterWidth, kKnownFilterHeight, kBlockDepth, 8>( + device, args, block_height, out_backprop, input, filter_backprop, + data_format); } } template bool TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( - const GpuDevice& d, const DepthwiseArgs& args, const T* out_backprop, + const GpuDevice& device, const DepthwiseArgs& args, const T* out_backprop, const T* input, T* filter_backprop, TensorFormat data_format) { - // Maximize (power of two) kBlockSlices while keeping a block within 1024 + // Maximize (power of two) kBlockDepth while keeping a block within 1024 // threads (2 pixels per thread). - int block_slices = 8; - int block_rows = (args.in_rows + 1) / 2; + int block_depth = 8; + int block_height = (args.in_rows + 1) / 2; int round_mask = 1; - for (; block_slices > 1; block_slices /= 2) { - // args.in_cols * block_rows * kBlockSlices must be multiple of 32. - for (; block_rows * args.in_cols * block_slices & 31; + for (; block_depth > 1; block_depth /= 2) { + // args.in_cols * block_height * kBlockDepth must be multiple of 32. + for (; block_height * args.in_cols * block_depth & 31; round_mask = round_mask * 2 + 1) { - block_rows = block_rows + round_mask & ~round_mask; + block_height = block_height + round_mask & ~round_mask; } - int block_size = block_rows * args.in_cols * block_slices; + int block_size = block_height * args.in_cols * block_depth; if (block_size <= 1024) { break; } } - if (!CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, block_rows)) { + if (!CanLaunchDepthwiseConv2dBackpropFilterGPUSmall(args, block_height)) { return false; } - switch (block_slices) { + switch (block_depth) { case 8: return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< T, kKnownFilterWidth, kKnownFilterHeight, 8>( - d, args, block_rows, out_backprop, input, filter_backprop, + device, args, block_height, out_backprop, input, filter_backprop, data_format); case 4: return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< T, kKnownFilterWidth, kKnownFilterHeight, 4>( - d, args, block_rows, out_backprop, input, filter_backprop, + device, args, block_height, out_backprop, input, filter_backprop, data_format); case 2: return TryLaunchDepthwiseConv2dBackpropFilterGPUSmall< T, kKnownFilterWidth, kKnownFilterHeight, 2>( - d, args, block_rows, out_backprop, input, filter_backprop, + device, args, block_height, out_backprop, input, filter_backprop, data_format); default: return false; @@ -1561,7 +1617,7 @@ bool TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( template -void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& d, +void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& device, const DepthwiseArgs& args, const T* out_backprop, const T* input, T* filter_backprop, @@ -1580,13 +1636,13 @@ void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& d, const int num_out_backprop = args.batch * args.out_rows * args.out_cols * args.out_depth; CudaLaunchConfig config = - GetCudaLaunchConfig(num_out_backprop, d, kernel, 0, 0); - kernel<<>>( + GetCudaLaunchConfig(num_out_backprop, device, kernel, 0, 0); + kernel<<>>( args, out_backprop, input, filter_backprop, num_out_backprop); } template -void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& d, +void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& device, const DepthwiseArgs& args, const T* out_backprop, const T* input, T* filter_backprop, @@ -1594,17 +1650,17 @@ void LaunchDepthwiseConv2dBackpropFilterGPU(const GpuDevice& d, if (args.depth_multiplier == 1) { if (TryLaunchDepthwiseConv2dBackpropFilterGPUSmall( - d, args, out_backprop, input, filter_backprop, data_format)) { + device, args, out_backprop, input, filter_backprop, data_format)) { return; } LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); + device, args, out_backprop, input, filter_backprop, data_format); } else { LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); + device, args, out_backprop, input, filter_backprop, data_format); } } @@ -1613,7 +1669,7 @@ template void LaunchDepthwiseConvBackpropFilterOp::operator()( OpKernelContext* ctx, const DepthwiseArgs& args, const T* out_backprop, const T* input, T* filter_backprop, TensorFormat data_format) { - const GpuDevice& d = ctx->eigen_device(); + const GpuDevice& device = ctx->eigen_device(); auto stream = ctx->op_device_context()->stream(); // Initialize the results to 0. @@ -1625,10 +1681,10 @@ void LaunchDepthwiseConvBackpropFilterOp::operator()( if (args.filter_rows == 3 && args.filter_cols == 3) { LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); + device, args, out_backprop, input, filter_backprop, data_format); } else { LaunchDepthwiseConv2dBackpropFilterGPU( - d, args, out_backprop, input, filter_backprop, data_format); + device, args, out_backprop, input, filter_backprop, data_format); } OP_REQUIRES(ctx, stream->ok(), errors::Internal("Launch of gpu kernel for " -- GitLab From ab9f850f6bba604cfad0d4a2bb11e40517030085 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 08:12:09 -0800 Subject: [PATCH 114/629] Force the use of print function in generated code. PiperOrigin-RevId: 185531979 --- tensorflow/contrib/py2tf/impl/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index 6525806a09..8b81d09cb4 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -35,6 +35,7 @@ NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Verify that these names are not hidden by generated code. # TODO(mdan): Make sure copybara renames the reference below. COMPILED_IMPORT_STATEMENTS = ( + 'from __future__ import print_function', 'import tensorflow as tf', 'from tensorflow.contrib.py2tf import utils as ' 'py2tf_utils') -- GitLab From 1fc821602d69e5812b854a61f09f163ce549641b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 08:46:26 -0800 Subject: [PATCH 115/629] Add gradient norm target arg to wass gradient penalty function. This trick is usd in the progressive GAN paper https://arxiv.org/abs/1710.10196 PiperOrigin-RevId: 185535584 --- .../gan/python/losses/python/losses_impl.py | 5 +++- .../python/losses/python/losses_impl_test.py | 23 +++++++++++++++++++ tensorflow/contrib/gan/python/train.py | 9 +++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py index 23a3b60cc0..39588b7219 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py @@ -305,6 +305,7 @@ def wasserstein_gradient_penalty( discriminator_fn, discriminator_scope, epsilon=1e-10, + target=1.0, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, @@ -324,6 +325,8 @@ def wasserstein_gradient_penalty( discriminator_scope: If not `None`, reuse discriminators from this scope. epsilon: A small positive number added for numerical stability when computing the gradient norm. + target: Optional Python number or `Tensor` indicating the target value of + gradient norm. Defaults to 1.0. weights: Optional `Tensor` whose rank is either 0, or the same rank as `real_data` and `generated_data`, and must be broadcastable to them (i.e., all dimensions must be either `1`, or the same as the @@ -374,7 +377,7 @@ def wasserstein_gradient_penalty( # For numerical stability, add epsilon to the sum before taking the square # root. Note tf.norm does not add epsilon. slopes = math_ops.sqrt(gradient_squares + epsilon) - penalties = math_ops.square(slopes - 1.0) + penalties = math_ops.square(slopes / target - 1.0) penalty = losses.compute_weighted_loss( penalties, weights, scope=scope, loss_collection=loss_collection, reduction=reduction) diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index 56ac45554d..dbaa624ae9 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -481,6 +481,29 @@ class GradientPenaltyTest(test.TestCase, _PenaltyTest): }) self.assertAlmostEqual(self._expected_loss, loss, 5) + def test_loss_with_gradient_norm_target(self): + """Test loss value with non default gradient norm target.""" + generated_data = array_ops.placeholder(dtypes.float32, shape=(None, None)) + real_data = array_ops.placeholder(dtypes.float32, shape=(None, None)) + + loss = tfgan_losses.wasserstein_gradient_penalty( + generated_data, + real_data, + self._kwargs['generator_inputs'], + self._kwargs['discriminator_fn'], + self._kwargs['discriminator_scope'], + target=2.0) + + with self.test_session() as sess: + variables.global_variables_initializer().run() + loss = sess.run( + loss, + feed_dict={ + generated_data: self._generated_data_np, + real_data: self._real_data_np, + }) + self.assertAlmostEqual(1.0, loss, 5) + def test_reuses_scope(self): """Test that gradient penalty reuses discriminator scope.""" num_vars = len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py index 5d0ac93aec..776eb11ecb 100644 --- a/tensorflow/contrib/gan/python/train.py +++ b/tensorflow/contrib/gan/python/train.py @@ -460,6 +460,7 @@ def gan_loss( # Auxiliary losses. gradient_penalty_weight=None, gradient_penalty_epsilon=1e-10, + gradient_penalty_target=1.0, mutual_information_penalty_weight=None, aux_cond_generator_weight=None, aux_cond_discriminator_weight=None, @@ -481,6 +482,9 @@ def gan_loss( small positive value used by the gradient penalty function for numerical stability. Note some applications will need to increase this value to avoid NaNs. + gradient_penalty_target: If `gradient_penalty_weight` is not None, a Python + number or `Tensor` indicating the target value of gradient norm. See the + CIFAR10 section of https://arxiv.org/abs/1710.10196. Defaults to 1.0. mutual_information_penalty_weight: If not `None`, must be a non-negative Python number or Tensor indicating how much to weight the mutual information penalty. See https://arxiv.org/abs/1606.03657 for more @@ -539,7 +543,10 @@ def gan_loss( # Add optional extra losses. if _use_aux_loss(gradient_penalty_weight): gp_loss = tfgan_losses.wasserstein_gradient_penalty( - model, epsilon=gradient_penalty_epsilon, add_summaries=add_summaries) + model, + epsilon=gradient_penalty_epsilon, + target=gradient_penalty_target, + add_summaries=add_summaries) dis_loss += gradient_penalty_weight * gp_loss if _use_aux_loss(mutual_information_penalty_weight): info_loss = tfgan_losses.mutual_information_penalty( -- GitLab From 903df63d5556f816685ce6b75c2216fc760d6b47 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 09:07:12 -0800 Subject: [PATCH 116/629] TF to XLA compiler to support FakeQuantWithMinMaxVars/Args. PiperOrigin-RevId: 185538228 --- tensorflow/compiler/tests/BUILD | 11 + .../compiler/tests/fake_quant_ops_test.py | 452 ++++++++++++++++++ tensorflow/compiler/tf2xla/kernels/BUILD | 1 + .../tf2xla/kernels/fake_quantize_ops.cc | 289 +++++++++++ 4 files changed, 753 insertions(+) create mode 100644 tensorflow/compiler/tests/fake_quant_ops_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index acd9cf7bee..b49d2ca961 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -815,6 +815,17 @@ tf_library( tfcompile_flags = ["--xla_cpu_multi_thread_eigen=false"], ) +tf_xla_py_test( + name = "fake_quant_ops_test", + size = "medium", + srcs = ["fake_quant_ops_test.py"], + deps = [ + ":xla_test", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:platform_test", + ], +) + # ----------------------------------------------------------------------------- filegroup( diff --git a/tensorflow/compiler/tests/fake_quant_ops_test.py b/tensorflow/compiler/tests/fake_quant_ops_test.py new file mode 100644 index 0000000000..dfe9400ef0 --- /dev/null +++ b/tensorflow/compiler/tests/fake_quant_ops_test.py @@ -0,0 +1,452 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from tensorflow.compiler.tests.xla_test import XLATestCase +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.platform import googletest + + +class FakeQuantWithMinMaxArgsTest(XLATestCase): + """Test cases for FakeQuantWithMinMaxArgs operation.""" + + # 8 bits, wide range. + def testOp_with8BitsNoScalingNoNudging(self): + self._TestOp(0.0, 255.0, 8, False, 0.0, 255.0, 1.0) + + def testOp_with8BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 128.0, 8, False, 0.0, 127.5, 0.5) + + def testOp_with8BitsScalingAndNudgingUp(self): + self._TestOp(-128.0, -0.5, 8, False, -127.5, 0.0, 0.5) + + def testOp_with8BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 127.4, 8, False, 0.0, 127.5, 0.5) + + # 8 bits, narrow range. + def testOp_with8BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 254.0, 8, True, 0.0, 254.0, 1.0) + + def testOp_with8BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 127.1, 8, True, 0.0, 127.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-127.1, -0.1, 8, True, -127.0, 0.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 126.9, 8, True, 0.0, 127.0, 0.5) + + # 7 bits, wide range. + def testOp_with7BitsNoScalingNoNudging(self): + self._TestOp(0.0, 127.0, 7, False, 0.0, 127.0, 1.0) + + def testOp_with7BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 64.0, 7, False, 0.0, 63.5, 0.5) + + def testOp_with7BitsScalingAndNudgingUp(self): + self._TestOp(-64.0, -0.5, 7, False, -63.5, 0.0, 0.5) + + def testOp_with7BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 63.4, 7, False, 0.0, 63.5, 0.5) + + # 7 bits, narrow range. + def testOp_with7BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 126.0, 7, True, 0.0, 126.0, 1.0) + + def testOp_with7BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 63.1, 7, True, 0.0, 63.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-63.1, -0.1, 7, True, -63.0, 0.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 62.9, 7, True, 0.0, 63.0, 0.5) + + def _TestOp(self, input_min, input_max, num_bits, narrow_range, + expected_nudged_input_min, expected_nudged_input_max, + expected_step): + inputs = np.array( + [ + expected_nudged_input_min - expected_step, + expected_nudged_input_min - 0.01, expected_nudged_input_min, + expected_nudged_input_min + 0.01, + expected_nudged_input_min + expected_step - 0.01, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step + 0.01, + expected_nudged_input_max - 0.01, expected_nudged_input_max, + expected_nudged_input_max + 0.01, + expected_nudged_input_max + expected_step + ], + dtype=np.float32) + expected = np.array( + [ + expected_nudged_input_min, expected_nudged_input_min, + expected_nudged_input_min, expected_nudged_input_min, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step, + expected_nudged_input_max, expected_nudged_input_max, + expected_nudged_input_max, expected_nudged_input_max + ], + dtype=np.float32) + + with self.test_session() as session: + with self.test_scope(): + input_placeholder = array_ops.placeholder( + dtypes.float32, inputs.shape, name="inputs") + outputs = array_ops.fake_quant_with_min_max_args( + input_placeholder, + min=input_min, + max=input_max, + num_bits=num_bits, + narrow_range=narrow_range) + result = session.run(outputs, {input_placeholder: inputs}) + self.assertAllCloseAccordingToType( + result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03) + + +class FakeQuantWithMinMaxArgsGradientTest(XLATestCase): + """Test cases for FakeQuantWithMinMaxArgsGradient operation.""" + + # 8 bits, wide range. + def testOp_with8BitsNoScalingNoNudging(self): + self._TestOp(0.0, 255.0, 8, False, 0.0, 255.0, 1.0) + + def testOp_with8BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 128.0, 8, False, 0.0, 127.5, 0.5) + + def testOp_with8BitsScalingAndNudgingUp(self): + self._TestOp(-128.0, -0.5, 8, False, -127.5, 0.0, 0.5) + + def testOp_with8BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 127.4, 8, False, 0.0, 127.5, 0.5) + + # 8 bits, narrow range. + def testOp_with8BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 254.0, 8, True, 0.0, 254.0, 1.0) + + def testOp_with8BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 127.1, 8, True, 0.0, 127.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-127.1, -0.1, 8, True, -127.0, 0.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 126.9, 8, True, 0.0, 127.0, 0.5) + + # 7 bits, wide range. + def testOp_with7BitsNoScalingNoNudging(self): + self._TestOp(0.0, 127.0, 7, False, 0.0, 127.0, 1.0) + + def testOp_with7BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 64.0, 7, False, 0.0, 63.5, 0.5) + + def testOp_with7BitsScalingAndNudgingUp(self): + self._TestOp(-64.0, -0.5, 7, False, -63.5, 0.0, 0.5) + + def testOp_with7BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 63.4, 7, False, 0.0, 63.5, 0.5) + + # 7 bits, narrow range. + def testOp_with7BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 126.0, 7, True, 0.0, 126.0, 1.0) + + def testOp_with7BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 63.1, 7, True, 0.0, 63.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-63.1, -0.1, 7, True, -63.0, 0.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 62.9, 7, True, 0.0, 63.0, 0.5) + + def _TestOp(self, input_min, input_max, num_bits, narrow_range, + expected_nudged_input_min, expected_nudged_input_max, + expected_step): + inputs = np.array( + [ + expected_nudged_input_min - expected_step, + expected_nudged_input_min - 0.01, expected_nudged_input_min, + expected_nudged_input_min + 0.01, + expected_nudged_input_min + expected_step - 0.01, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step + 0.01, + expected_nudged_input_max - 0.01, expected_nudged_input_max, + expected_nudged_input_max + 0.01, + expected_nudged_input_max + expected_step + ], + dtype=np.float32) + gradients = np.arange(1, len(inputs) + 1, dtype=np.float32) + expected_backprops = np.array( + [0.0, 0.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0, 0.0], + dtype=np.float32) + + with self.test_session() as session: + with self.test_scope(): + gradient_placeholder = array_ops.placeholder( + dtypes.float32, gradients.shape, name="gradients") + input_placeholder = array_ops.placeholder( + dtypes.float32, inputs.shape, name="inputs") + outputs = gen_array_ops.fake_quant_with_min_max_args_gradient( + gradient_placeholder, + input_placeholder, + min=input_min, + max=input_max, + num_bits=num_bits, + narrow_range=narrow_range) + backprops = session.run(outputs, { + gradient_placeholder: gradients, + input_placeholder: inputs + }) + self.assertAllCloseAccordingToType( + backprops, + expected_backprops, + rtol=1e-3, + atol=1e-5, + bfloat16_rtol=0.03) + + +class FakeQuantWithMinMaxVarsTest(XLATestCase): + """Test cases for FakeQuantWithMinMaxVars operation.""" + + # 8 bits, wide range. + def testOp_with8BitsNoScalingNoNudging(self): + self._TestOp(0.0, 255.0, 8, False, 0.0, 255.0, 1.0) + + def testOp_with8BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 128.0, 8, False, 0.0, 127.5, 0.5) + + def testOp_with8BitsScalingAndNudgingUp(self): + self._TestOp(-128.0, -0.5, 8, False, -127.5, 0.0, 0.5) + + def testOp_with8BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 127.4, 8, False, 0.0, 127.5, 0.5) + + # 8 bits, narrow range. + def testOp_with8BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 254.0, 8, True, 0.0, 254.0, 1.0) + + def testOp_with8BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 127.1, 8, True, 0.0, 127.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-127.1, -0.1, 8, True, -127.0, 0.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 126.9, 8, True, 0.0, 127.0, 0.5) + + # 7 bits, wide range. + def testOp_with7BitsNoScalingNoNudging(self): + self._TestOp(0.0, 127.0, 7, False, 0.0, 127.0, 1.0) + + def testOp_with7BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 64.0, 7, False, 0.0, 63.5, 0.5) + + def testOp_with7BitsScalingAndNudgingUp(self): + self._TestOp(-64.0, -0.5, 7, False, -63.5, 0.0, 0.5) + + def testOp_with7BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 63.4, 7, False, 0.0, 63.5, 0.5) + + # 7 bits, narrow range. + def testOp_with7BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 126.0, 7, True, 0.0, 126.0, 1.0) + + def testOp_with7BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 63.1, 7, True, 0.0, 63.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-63.1, -0.1, 7, True, -63.0, 0.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 62.9, 7, True, 0.0, 63.0, 0.5) + + def _TestOp(self, input_min, input_max, num_bits, narrow_range, + expected_nudged_input_min, expected_nudged_input_max, + expected_step): + inputs = np.array( + [ + expected_nudged_input_min - expected_step, + expected_nudged_input_min - 0.01, expected_nudged_input_min, + expected_nudged_input_min + 0.01, + expected_nudged_input_min + expected_step - 0.01, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step + 0.01, + expected_nudged_input_max - 0.01, expected_nudged_input_max, + expected_nudged_input_max + 0.01, + expected_nudged_input_max + expected_step + ], + dtype=np.float32) + expected = np.array( + [ + expected_nudged_input_min, expected_nudged_input_min, + expected_nudged_input_min, expected_nudged_input_min, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step, + expected_nudged_input_max, expected_nudged_input_max, + expected_nudged_input_max, expected_nudged_input_max + ], + dtype=np.float32) + + with self.test_session() as session: + with self.test_scope(): + input_placeholder = array_ops.placeholder( + dtypes.float32, inputs.shape, name="inputs") + min_placeholder = array_ops.placeholder(dtypes.float32, (), name="min") + max_placeholder = array_ops.placeholder(dtypes.float32, (), name="max") + outputs = array_ops.fake_quant_with_min_max_vars( + input_placeholder, + min_placeholder, + max_placeholder, + num_bits=num_bits, + narrow_range=narrow_range) + result = session.run( + outputs, { + input_placeholder: inputs, + min_placeholder: input_min, + max_placeholder: input_max + }) + self.assertAllCloseAccordingToType( + result, expected, rtol=1e-3, atol=1e-5, bfloat16_rtol=0.03) + + +class FakeQuantWithMinMaxVarsGradientTest(XLATestCase): + """Test cases for FakeQuantWithMinMaxVarsGradient operation.""" + + # 8 bits, wide range. + def testOp_with8BitsNoScalingNoNudging(self): + self._TestOp(0.0, 255.0, 8, False, 0.0, 255.0, 1.0) + + def testOp_with8BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 128.0, 8, False, 0.0, 127.5, 0.5) + + def testOp_with8BitsScalingAndNudgingUp(self): + self._TestOp(-128.0, -0.5, 8, False, -127.5, 0.0, 0.5) + + def testOp_with8BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 127.4, 8, False, 0.0, 127.5, 0.5) + + # 8 bits, narrow range. + def testOp_with8BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 254.0, 8, True, 0.0, 254.0, 1.0) + + def testOp_with8BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 127.1, 8, True, 0.0, 127.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-127.1, -0.1, 8, True, -127.0, 0.0, 0.5) + + def testOp_with8BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 126.9, 8, True, 0.0, 127.0, 0.5) + + # 7 bits, wide range. + def testOp_with7BitsNoScalingNoNudging(self): + self._TestOp(0.0, 127.0, 7, False, 0.0, 127.0, 1.0) + + def testOp_with7BitsScalingAndNudgingDown(self): + self._TestOp(0.5, 64.0, 7, False, 0.0, 63.5, 0.5) + + def testOp_with7BitsScalingAndNudgingUp(self): + self._TestOp(-64.0, -0.5, 7, False, -63.5, 0.0, 0.5) + + def testOp_with7BitsScalingAndNudgingBetween(self): + self._TestOp(-0.1, 63.4, 7, False, 0.0, 63.5, 0.5) + + # 7 bits, narrow range. + def testOp_with7BitsNarrowRangeNoScalingNoNudging(self): + self._TestOp(0.0, 126.0, 7, True, 0.0, 126.0, 1.0) + + def testOp_with7BitsNarrowRangeScalingAndNudgingDown(self): + self._TestOp(0.1, 63.1, 7, True, 0.0, 63.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingUp(self): + self._TestOp(-63.1, -0.1, 7, True, -63.0, 0.0, 0.5) + + def testOp_with7BitsNarrowRangeScalingAndNudgingBetween(self): + self._TestOp(-0.1, 62.9, 7, True, 0.0, 63.0, 0.5) + + def _TestOp(self, input_min, input_max, num_bits, narrow_range, + expected_nudged_input_min, expected_nudged_input_max, + expected_step): + inputs = np.array( + [ + expected_nudged_input_min - expected_step, + expected_nudged_input_min - 0.01, expected_nudged_input_min, + expected_nudged_input_min + 0.01, + expected_nudged_input_min + expected_step - 0.01, + expected_nudged_input_min + expected_step, + expected_nudged_input_min + expected_step + 0.01, + expected_nudged_input_max - 0.01, expected_nudged_input_max, + expected_nudged_input_max + 0.01, + expected_nudged_input_max + expected_step + ], + dtype=np.float32) + gradients = np.arange(1, len(inputs) + 1, dtype=np.float32) + expected_backprops_wrt_input = np.array( + [0.0, 0.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0, 0.0], + dtype=np.float32) + expected_backprops_wrt_min = 1.0 + 2.0 + expected_backprops_wrt_max = 10.0 + 11.0 + + with self.test_session() as session: + with self.test_scope(): + gradient_placeholder = array_ops.placeholder( + dtypes.float32, gradients.shape, name="gradients") + input_placeholder = array_ops.placeholder( + dtypes.float32, inputs.shape, name="inputs") + min_placeholder = array_ops.placeholder(dtypes.float32, (), name="min") + max_placeholder = array_ops.placeholder(dtypes.float32, (), name="max") + outputs = array_ops.fake_quant_with_min_max_vars_gradient( + gradient_placeholder, + input_placeholder, + min_placeholder, + max_placeholder, + num_bits=num_bits, + narrow_range=narrow_range) + backprops_wrt_input, backprops_wrt_min, backprops_wrt_max = session.run( + outputs, { + gradient_placeholder: gradients, + input_placeholder: inputs, + min_placeholder: input_min, + max_placeholder: input_max + }) + self.assertAllCloseAccordingToType( + backprops_wrt_input, + expected_backprops_wrt_input, + rtol=1e-3, + atol=1e-5, + bfloat16_rtol=0.03) + self.assertAllCloseAccordingToType( + backprops_wrt_min, + expected_backprops_wrt_min, + rtol=1e-3, + atol=1e-5, + bfloat16_rtol=0.03) + self.assertAllCloseAccordingToType( + backprops_wrt_max, + expected_backprops_wrt_max, + rtol=1e-3, + atol=1e-5, + bfloat16_rtol=0.03) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index d83d576eda..d2fa933cf9 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -32,6 +32,7 @@ tf_kernel_library( "dynamic_stitch_op.cc", "elu_op.cc", "extract_image_patches_op.cc", + "fake_quantize_ops.cc", "fft_ops.cc", "fill_op.cc", "function_ops.cc", diff --git a/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc new file mode 100644 index 0000000000..453a32c494 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/fake_quantize_ops.cc @@ -0,0 +1,289 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_helpers.h" +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/core/platform/macros.h" + +namespace tensorflow { +namespace { + +// Gymnastics with nudged zero point is to ensure that the real zero maps to +// an integer, which is required for e.g. zero-padding in convolutional layers. +void CpuNudge(const float min, const float max, const float quant_min, + const float quant_max, float* nudged_min, float* nudged_max, + float* scale) { + *scale = (max - min) / (quant_max - quant_min); + + const float zero_point_from_min = quant_min - min / *scale; + float nudged_zero_point; + if (zero_point_from_min <= quant_min) { + nudged_zero_point = quant_min; + } else if (zero_point_from_min >= quant_max) { + nudged_zero_point = quant_max; + } else { + nudged_zero_point = std::round(zero_point_from_min); + } + + *nudged_min = (quant_min - nudged_zero_point) * (*scale); + *nudged_max = (quant_max - nudged_zero_point) * (*scale); +} + +// An XLA version of CpuNudge(). +void XlaNudge(xla::ComputationBuilder* b, const DataType data_type, + const xla::ComputationDataHandle& min, + const xla::ComputationDataHandle& max, + const float quant_min_value, const float quant_max_value, + xla::ComputationDataHandle* nudged_min, + xla::ComputationDataHandle* nudged_max, + xla::ComputationDataHandle* scale) { + *scale = b->Div(b->Sub(max, min), + XlaHelpers::FloatLiteral(b, data_type, + quant_max_value - quant_min_value)); + xla::ComputationDataHandle quant_min = + XlaHelpers::FloatLiteral(b, data_type, quant_min_value); + xla::ComputationDataHandle zero_point_from_min = + b->Sub(quant_min, b->Div(min, *scale)); + xla::ComputationDataHandle quant_max = + XlaHelpers::FloatLiteral(b, data_type, quant_max_value); + xla::ComputationDataHandle nudged_zero_point = + b->Select(b->Le(zero_point_from_min, quant_min), quant_min, + b->Select(b->Ge(zero_point_from_min, quant_max), quant_max, + b->Round(zero_point_from_min))); + *nudged_min = b->Mul(b->Sub(quant_min, nudged_zero_point), *scale); + *nudged_max = b->Mul(b->Sub(quant_max, nudged_zero_point), *scale); +} + +xla::ComputationDataHandle Quantize( + xla::ComputationBuilder* b, const xla::ComputationDataHandle& input, + const DataType data_type, + const xla::ComputationDataHandle& nudged_input_min, + const xla::ComputationDataHandle& nudged_input_max, + const xla::ComputationDataHandle& input_scale) { + xla::ComputationDataHandle one = XlaHelpers::FloatLiteral(b, data_type, 1.0f); + xla::ComputationDataHandle inv_scale = b->Div(one, input_scale); + xla::ComputationDataHandle half = + XlaHelpers::FloatLiteral(b, data_type, 0.5f); + + xla::ComputationDataHandle clamped = + b->Clamp(nudged_input_min, input, nudged_input_max); + xla::ComputationDataHandle clamped_shifted = + b->Sub(clamped, nudged_input_min); + xla::ComputationDataHandle rounded = + b->Floor(b->Add(b->Mul(clamped_shifted, inv_scale), half)); + return b->Add(b->Mul(rounded, input_scale), nudged_input_min); +} + +class FakeQuantWithMinMaxArgsOp : public XlaOpKernel { + public: + explicit FakeQuantWithMinMaxArgsOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + int num_bits; + OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits)); + OP_REQUIRES(ctx, num_bits >= 2 && num_bits <= 16, + errors::InvalidArgument("num_bits is out of range, expected " + "between 2 and 16, was: ", + num_bits)); + bool narrow_range; + OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range)); + quant_min_ = narrow_range ? 1 : 0; + quant_max_ = (1 << num_bits) - 1; + + float input_min, input_max; + OP_REQUIRES_OK(ctx, ctx->GetAttr("min", &input_min)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("max", &input_max)); + CpuNudge(input_min, input_max, quant_min_, quant_max_, &nudged_input_min_, + &nudged_input_max_, &input_scale_); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationDataHandle input = ctx->Input(0); + const DataType data_type = ctx->input_type(0); + + xla::ComputationBuilder* b = ctx->builder(); + xla::ComputationDataHandle nudged_input_min = + XlaHelpers::FloatLiteral(b, data_type, nudged_input_min_); + xla::ComputationDataHandle nudged_input_max = + XlaHelpers::FloatLiteral(b, data_type, nudged_input_max_); + xla::ComputationDataHandle input_scale = + XlaHelpers::FloatLiteral(b, data_type, input_scale_); + xla::ComputationDataHandle output = Quantize( + b, input, data_type, nudged_input_min, nudged_input_max, input_scale); + ctx->SetOutput(0, output); + } + + private: + float quant_min_; + float quant_max_; + float nudged_input_min_; + float nudged_input_max_; + float input_scale_; +}; + +REGISTER_XLA_OP(Name("FakeQuantWithMinMaxArgs"), FakeQuantWithMinMaxArgsOp); + +class FakeQuantWithMinMaxArgsGradOp : public XlaOpKernel { + public: + explicit FakeQuantWithMinMaxArgsGradOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + int num_bits; + OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits)); + OP_REQUIRES(ctx, num_bits >= 2 && num_bits <= 16, + errors::InvalidArgument("num_bits is out of range, expected " + "between 2 and 16, was: ", + num_bits)); + bool narrow_range; + OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range)); + const float quant_min = narrow_range ? 1 : 0; + const float quant_max = (1 << num_bits) - 1; + + float input_min, input_max, scale; + OP_REQUIRES_OK(ctx, ctx->GetAttr("min", &input_min)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("max", &input_max)); + CpuNudge(input_min, input_max, quant_min, quant_max, &nudged_input_min_, + &nudged_input_max_, &scale); + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationDataHandle gradient = ctx->Input(0); + const TensorShape gradient_shape = ctx->InputShape(0); + xla::ComputationDataHandle input = ctx->Input(1); + const DataType data_type = ctx->input_type(1); + + xla::ComputationBuilder* b = ctx->builder(); + xla::ComputationDataHandle nudged_input_min = + XlaHelpers::FloatLiteral(b, data_type, nudged_input_min_); + xla::ComputationDataHandle nudged_input_max = + XlaHelpers::FloatLiteral(b, data_type, nudged_input_max_); + + xla::ComputationDataHandle between_nudged_min_max = + b->And(b->Le(nudged_input_min, input), b->Le(input, nudged_input_max)); + xla::ComputationDataHandle zeroes = b->Broadcast( + XlaHelpers::Zero(b, data_type), gradient_shape.dim_sizes()); + xla::ComputationDataHandle output = + b->Select(between_nudged_min_max, gradient, zeroes); + ctx->SetOutput(0, output); + } + + private: + float nudged_input_min_; + float nudged_input_max_; +}; + +REGISTER_XLA_OP(Name("FakeQuantWithMinMaxArgsGradient"), + FakeQuantWithMinMaxArgsGradOp); + +class FakeQuantWithMinMaxVarsOp : public XlaOpKernel { + public: + explicit FakeQuantWithMinMaxVarsOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + int num_bits; + OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits)); + OP_REQUIRES(ctx, num_bits >= 2 && num_bits <= 16, + errors::InvalidArgument("num_bits is out of range, expected " + "between 2 and 16, was: ", + num_bits)); + bool narrow_range; + OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range)); + quant_min_ = narrow_range ? 1 : 0; + quant_max_ = (1 << num_bits) - 1; + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationDataHandle input = ctx->Input(0); + const DataType data_type = ctx->input_type(0); + xla::ComputationDataHandle input_min = ctx->Input(1); + xla::ComputationDataHandle input_max = ctx->Input(2); + + xla::ComputationBuilder* b = ctx->builder(); + xla::ComputationDataHandle nudged_input_min, nudged_input_max, input_scale; + XlaNudge(b, data_type, input_min, input_max, quant_min_, quant_max_, + &nudged_input_min, &nudged_input_max, &input_scale); + + xla::ComputationDataHandle output = Quantize( + b, input, data_type, nudged_input_min, nudged_input_max, input_scale); + ctx->SetOutput(0, output); + } + + private: + float quant_min_; + float quant_max_; +}; + +REGISTER_XLA_OP(Name("FakeQuantWithMinMaxVars"), FakeQuantWithMinMaxVarsOp); + +class FakeQuantWithMinMaxVarsGradOp : public XlaOpKernel { + public: + explicit FakeQuantWithMinMaxVarsGradOp(OpKernelConstruction* ctx) + : XlaOpKernel(ctx) { + int num_bits; + OP_REQUIRES_OK(ctx, ctx->GetAttr("num_bits", &num_bits)); + OP_REQUIRES(ctx, num_bits >= 2 && num_bits <= 16, + errors::InvalidArgument("num_bits is out of range, expected " + "between 2 and 16, was: ", + num_bits)); + bool narrow_range; + OP_REQUIRES_OK(ctx, ctx->GetAttr("narrow_range", &narrow_range)); + quant_min_ = narrow_range ? 1 : 0; + quant_max_ = (1 << num_bits) - 1; + } + + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationDataHandle gradient = ctx->Input(0); + const TensorShape gradient_shape = ctx->InputShape(0); + xla::ComputationDataHandle input = ctx->Input(1); + const DataType data_type = ctx->input_type(1); + xla::ComputationDataHandle input_min = ctx->Input(2); + xla::ComputationDataHandle input_max = ctx->Input(3); + + xla::ComputationBuilder* b = ctx->builder(); + xla::ComputationDataHandle nudged_input_min, nudged_input_max, input_scale; + XlaNudge(b, data_type, input_min, input_max, quant_min_, quant_max_, + &nudged_input_min, &nudged_input_max, &input_scale); + + xla::ComputationDataHandle between_nudged_min_max = + b->And(b->Le(nudged_input_min, input), b->Le(input, nudged_input_max)); + xla::ComputationDataHandle zero = XlaHelpers::Zero(b, data_type); + xla::ComputationDataHandle zeroes = + b->Broadcast(zero, gradient_shape.dim_sizes()); + xla::ComputationDataHandle output0 = + b->Select(between_nudged_min_max, gradient, zeroes); + ctx->SetOutput(0, output0); + + xla::ComputationDataHandle below_min = b->Lt(input, nudged_input_min); + xla::ComputationDataHandle output1 = + b->ReduceAll(b->Select(below_min, gradient, zeroes), zero, + *ctx->GetOrCreateAdd(data_type)); + ctx->SetOutput(1, output1); + + xla::ComputationDataHandle above_max = b->Gt(input, nudged_input_max); + xla::ComputationDataHandle output2 = + b->ReduceAll(b->Select(above_max, gradient, zeroes), zero, + *ctx->GetOrCreateAdd(data_type)); + ctx->SetOutput(2, output2); + } + + private: + float quant_min_; + float quant_max_; +}; + +REGISTER_XLA_OP(Name("FakeQuantWithMinMaxVarsGradient"), + FakeQuantWithMinMaxVarsGradOp); + +} // namespace +} // namespace tensorflow -- GitLab From 7ed63563c853ac77f9439a1d367add2ec873bea7 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 13 Feb 2018 09:46:36 -0800 Subject: [PATCH 117/629] Tiny bugfix to eager TensorArray error message. PiperOrigin-RevId: 185543699 --- tensorflow/python/ops/tensor_array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 5cdf03509e..3c08870146 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -653,7 +653,7 @@ class _EagerTensorArray(object): if len(tensors) > len(self._tensor_array) and not self._dynamic_size: raise ValueError( "Cannot unstack %d tensors into a TensorArray of static size %d" % - (len(tensors), len(self._tensors))) + (len(tensors), len(self._tensor_array))) ta = self._identity_without_array() ta._implementation._tensor_array = tensors # pylint: disable=protected-access return ta -- GitLab From f0bcd79ba07d7b759f5da2bf39fd5acaee148ba5 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 13 Feb 2018 10:16:03 -0800 Subject: [PATCH 118/629] Move two common utility functions used by training and training_eager classes to a utility class. PiperOrigin-RevId: 185548922 --- .../keras/_impl/keras/engine/training.py | 84 ++++--------------- .../_impl/keras/engine/training_eager.py | 82 +++--------------- .../_impl/keras/engine/training_eager_test.py | 27 +++--- .../keras/_impl/keras/engine/training_test.py | 27 +++--- .../keras/_impl/keras/utils/generic_utils.py | 62 ++++++++++++++ 5 files changed, 116 insertions(+), 166 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index c4b0414836..a71f371b8e 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -35,7 +35,9 @@ from tensorflow.python.keras._impl.keras.engine.topology import Network from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence +from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module @@ -362,62 +364,6 @@ def _batch_shuffle(index_array, batch_size): return np.append(index_array, last_batch) -def _make_batches(size, batch_size): - """Returns a list of batch indices (tuples of indices). - - Arguments: - size: Integer, total size of the data to slice into batches. - batch_size: Integer, batch size. - - Returns: - A list of tuples of array indices. - """ - num_batches = (size + batch_size - 1) // batch_size # round up - return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(num_batches)] - - -def _slice_arrays(arrays, start=None, stop=None): - """Slice an array or list of arrays. - - This takes an array-like, or a list of - array-likes, and outputs: - - arrays[start:stop] if `arrays` is an array-like - - [x[start:stop] for x in arrays] if `arrays` is a list - - Can also work on list/array of indices: `_slice_arrays(x, indices)` - - Arguments: - arrays: Single array or list of arrays. - start: can be an integer index (start index) - or a list/array of indices - stop: integer (stop index); should be None if - `start` was a list. - - Returns: - A slice of the array(s). - """ - if arrays is None: - return [None] - elif isinstance(arrays, list): - if hasattr(start, '__len__'): - # hdf5 datasets only support list objects as indices - if hasattr(start, 'shape'): - start = start.tolist() - return [None if x is None else x[start] for x in arrays] - else: - return [None if x is None else x[start:stop] for x in arrays] - else: - if hasattr(start, '__len__'): - if hasattr(start, 'shape'): - start = start.tolist() - return arrays[start] - elif hasattr(start, '__getitem__'): - return arrays[start:stop] - else: - return [None] - - def _weighted_masked_objective(fn): """Adds support for masking and sample-weighting to an objective function. @@ -1277,16 +1223,16 @@ class Model(Network): elif shuffle: np.random.shuffle(index_array) - batches = _make_batches(num_train_samples, batch_size) + batches = make_batches(num_train_samples, batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: if isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) except TypeError: raise TypeError('TypeError while preparing batch. ' 'If using HDF5 input data, ' @@ -1381,15 +1327,15 @@ class Model(Network): else: # Sample-based predictions. outs = [] - batches = _make_batches(num_samples, batch_size) + batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if ins and isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) for i in indices_for_conversion_to_dense: ins_batch[i] = ins_batch[i].toarray() @@ -1460,15 +1406,15 @@ class Model(Network): for i in range(len(outs)): outs[i] /= steps else: - batches = _make_batches(num_samples, batch_size) + batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) for i in indices_for_conversion_to_dense: ins_batch[i] = ins_batch[i].toarray() @@ -2025,10 +1971,10 @@ class Model(Network): split_at = int(x[0].shape[0] * (1. - validation_split)) else: split_at = int(len(x[0]) * (1. - validation_split)) - x, val_x = (_slice_arrays(x, 0, split_at), _slice_arrays(x, split_at)) - y, val_y = (_slice_arrays(y, 0, split_at), _slice_arrays(y, split_at)) - sample_weights, val_sample_weights = (_slice_arrays( - sample_weights, 0, split_at), _slice_arrays(sample_weights, split_at)) + x, val_x = (slice_arrays(x, 0, split_at), slice_arrays(x, split_at)) + y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) + sample_weights, val_sample_weights = (slice_arrays( + sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) if self.uses_learning_phase and not isinstance(K.learning_phase(), int): val_ins = val_x + val_y + val_sample_weights + [0.] else: diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 3774596af0..477bb2fe7a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -26,69 +26,9 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module +from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar - - -def _make_batches(size, batch_size): - """Returns a list of batch indices (tuples of indices). - - Arguments: - size: Integer, total size of the data to slice into batches. - batch_size: Integer, batch size. - - Returns: - A list of tuples of array indices. - """ - num_batches = int(np.ceil(size / float(batch_size))) - return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(0, num_batches)] - - -def _slice_arrays(arrays, start=None, stop=None): - """Slice an array or list of arrays. - - This takes an array-like, or a list of - array-likes, and outputs: - - arrays[start:stop] if `arrays` is an array-like - - [x[start:stop] for x in arrays] if `arrays` is a list - - Can also work on list/array of indices: `_slice_arrays(x, indices)` - - Arguments: - arrays: Single array or list of arrays. - start: can be an integer index (start index) - or a list/array of indices - stop: integer (stop index); should be None if - `start` was a list. - - Returns: - A slice of the array(s). - - Raises: - ValueError: If the value of start is a list and stop is not None. - """ - if arrays is None: - return [None] - if isinstance(start, list) and stop is not None: - raise ValueError('The stop argument has to be None if the value of start is' - 'a list.') - elif isinstance(arrays, list): - if hasattr(start, '__len__'): - # hdf5 datasets only support list objects as indices - if hasattr(start, 'shape'): - start = start.tolist() - return [None if x is None else x[start] for x in arrays] - else: - return [None if x is None else x[start:stop] for x in arrays] - else: - if hasattr(start, '__len__'): - if hasattr(start, 'shape'): - start = start.tolist() - return arrays[start] - elif hasattr(start, '__getitem__'): - return arrays[start:stop] - else: - return [None] +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays def _get_metrics_info(metric, internal_output_shapes=None, loss_func=None): @@ -442,16 +382,16 @@ def fit_loop( elif shuffle: np.random.shuffle(index_array) - batches = _make_batches(num_train_samples, batch_size) + batches = make_batches(num_train_samples, batch_size) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: if isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) except TypeError: raise TypeError('TypeError while preparing batch. ' 'If using HDF5 input data, ' @@ -554,15 +494,15 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): outs = [] if verbose == 1: progbar = Progbar(target=num_samples) - batches = _make_batches(num_samples, batch_size) + batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) ins_batch_converted = [] for ib in ins_batch: @@ -631,15 +571,15 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): progbar = Progbar(target=num_samples) outs = [] - batches = _make_batches(num_samples, batch_size) + batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] if ins and isinstance(ins[-1], float): # Do not slice the training phase flag. - ins_batch = _slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: - ins_batch = _slice_arrays(ins, batch_ids) + ins_batch = slice_arrays(ins, batch_ids) ins_batch_converted = [] for ib in ins_batch: diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 81e2f7a514..45601f964a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -24,6 +24,7 @@ import numpy as np from tensorflow.python.framework import ops from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test from tensorflow.python.training.rmsprop import RMSPropOptimizer @@ -702,22 +703,22 @@ class TestTrainingUtils(test.TestCase): def test_slice_arrays(self): input_a = np.random.random((10, 3)) - keras.engine.training._slice_arrays(None) - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(None) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = [None, [1, 1], None, [1, 1]] - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = [None] - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = None - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) def test_fit_with_BatchNorm(self): model = keras.models.Sequential() diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index b380238e4e..9651eb9f14 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -26,6 +26,7 @@ import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils from tensorflow.python.keras._impl.keras.engine.training import _weighted_masked_objective +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test try: @@ -1057,22 +1058,22 @@ class TestTrainingUtils(test.TestCase): def test_slice_arrays(self): input_a = np.random.random((10, 3)) - keras.engine.training._slice_arrays(None) - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(None) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = [None, [1, 1], None, [1, 1]] - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = [None] - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) input_a = None - keras.engine.training._slice_arrays(input_a, 0) - keras.engine.training._slice_arrays(input_a, 0, 1) - keras.engine.training._slice_arrays(input_a, stop=2) + slice_arrays(input_a, 0) + slice_arrays(input_a, 0, 1) + slice_arrays(input_a, stop=2) class TestTrainingWithDataTensors(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index adbe6c3288..fa3b55c59e 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -433,3 +433,65 @@ class Progbar(object): def add(self, n, values=None): self.update(self.seen_so_far + n, values) + + +def make_batches(size, batch_size): + """Returns a list of batch indices (tuples of indices). + + Arguments: + size: Integer, total size of the data to slice into batches. + batch_size: Integer, batch size. + + Returns: + A list of tuples of array indices. + """ + num_batches = int(np.ceil(size / float(batch_size))) + return [(i * batch_size, min(size, (i + 1) * batch_size)) + for i in range(0, num_batches)] + + +def slice_arrays(arrays, start=None, stop=None): + """Slice an array or list of arrays. + + This takes an array-like, or a list of + array-likes, and outputs: + - arrays[start:stop] if `arrays` is an array-like + - [x[start:stop] for x in arrays] if `arrays` is a list + + Can also work on list/array of indices: `slice_arrays(x, indices)` + + Arguments: + arrays: Single array or list of arrays. + start: can be an integer index (start index) + or a list/array of indices + stop: integer (stop index); should be None if + `start` was a list. + + Returns: + A slice of the array(s). + + Raises: + ValueError: If the value of start is a list and stop is not None. + """ + if arrays is None: + return [None] + if isinstance(start, list) and stop is not None: + raise ValueError('The stop argument has to be None if the value of start is' + 'a list.') + elif isinstance(arrays, list): + if hasattr(start, '__len__'): + # hdf5 datasets only support list objects as indices + if hasattr(start, 'shape'): + start = start.tolist() + return [None if x is None else x[start] for x in arrays] + else: + return [None if x is None else x[start:stop] for x in arrays] + else: + if hasattr(start, '__len__'): + if hasattr(start, 'shape'): + start = start.tolist() + return arrays[start] + elif hasattr(start, '__getitem__'): + return arrays[start:stop] + else: + return [None] -- GitLab From 2f93ce3438af1f068eed8c9b01eb508bda3dcdcc Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 13 Feb 2018 10:23:53 -0800 Subject: [PATCH 119/629] Use _set_attr instead of directly modifying the nodedef PiperOrigin-RevId: 185550223 --- tensorflow/contrib/lite/python/BUILD | 1 + tensorflow/contrib/lite/python/op_hint.py | 31 +++++++++++++++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/lite/python/BUILD b/tensorflow/contrib/lite/python/BUILD index 2d8c49b7d7..82feae0f00 100644 --- a/tensorflow/contrib/lite/python/BUILD +++ b/tensorflow/contrib/lite/python/BUILD @@ -28,6 +28,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "//tensorflow/contrib/framework:framework_py", + "//tensorflow/core:protos_all_py", "//tensorflow/python:platform", ], ) diff --git a/tensorflow/contrib/lite/python/op_hint.py b/tensorflow/contrib/lite/python/op_hint.py index 7c587e38b1..9a3971228a 100644 --- a/tensorflow/contrib/lite/python/op_hint.py +++ b/tensorflow/contrib/lite/python/op_hint.py @@ -73,6 +73,7 @@ import itertools as _itertools import uuid as _uuid from tensorflow.contrib import framework as _framework +from tensorflow.core.framework import attr_value_pb2 as _attr_value_pb2 from tensorflow.python.framework import ops as _ops from tensorflow.python.ops import array_ops as _array_ops from tensorflow.python.util.all_util import remove_undocumented @@ -133,10 +134,17 @@ class OpHint(object): def augmented_identity(arg): identity_op = _array_ops.identity(arg) - attr = identity_op.op.node_def.attr - attr[OpHint.FUNCTION_NAME_ATTR].s = self._function_name - attr[OpHint.FUNCTION_UUID_ATTR].s = self._unique_function_id - attr[OpHint.FUNCTION_INPUT_INDEX_ATTR].i = self._curr_input_index + # pylint: disable=protected-access + identity_op.op._set_attr( + OpHint.FUNCTION_NAME_ATTR, + _attr_value_pb2.AttrValue(s=self._function_name)) + identity_op.op._set_attr( + OpHint.FUNCTION_UUID_ATTR, + _attr_value_pb2.AttrValue(s=self._unique_function_id)) + identity_op.op._set_attr( + OpHint.FUNCTION_INPUT_INDEX_ATTR, + _attr_value_pb2.AttrValue(i=self._curr_input_index)) + # pylint: enable=protected-access self._curr_input_index += 1 return identity_op @@ -154,10 +162,17 @@ class OpHint(object): def augmented_identity(arg): identity_op = _array_ops.identity(arg) - attr = identity_op.op.node_def.attr - attr[OpHint.FUNCTION_NAME_ATTR].s = self._function_name - attr[OpHint.FUNCTION_UUID_ATTR].s = self._unique_function_id - attr[OpHint.FUNCTION_OUTPUT_INDEX_ATTR].i = self._curr_output_index + # pylint: disable=protected-access + identity_op.op._set_attr( + OpHint.FUNCTION_NAME_ATTR, + _attr_value_pb2.AttrValue(s=self._function_name)) + identity_op.op._set_attr( + OpHint.FUNCTION_UUID_ATTR, + _attr_value_pb2.AttrValue(s=self._unique_function_id)) + identity_op.op._set_attr( + OpHint.FUNCTION_OUTPUT_INDEX_ATTR, + _attr_value_pb2.AttrValue(i=self._curr_output_index)) + # pylint: enable=protected-access self._curr_output_index += 1 return identity_op -- GitLab From cd3a6effe4f7bbaa3857bfe6432a361a7676507f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 10:36:37 -0800 Subject: [PATCH 120/629] Fix documentation for the real shape of the output of crf_log_likelihood. PiperOrigin-RevId: 185552171 --- tensorflow/contrib/crf/python/ops/crf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 62708636c6..faa78769b9 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -166,8 +166,8 @@ def crf_log_likelihood(inputs, sequence_lengths: A [batch_size] vector of true sequence lengths. transition_params: A [num_tags, num_tags] transition matrix, if available. Returns: - log_likelihood: A scalar containing the log-likelihood of the given sequence - of tag indices. + log_likelihood: A [batch_size] `Tensor` containing the log-likelihood of + each example, given the sequence of tag indices. transition_params: A [num_tags, num_tags] transition matrix. This is either provided by the caller or created in this function. """ @@ -182,7 +182,7 @@ def crf_log_likelihood(inputs, transition_params) log_norm = crf_log_norm(inputs, sequence_lengths, transition_params) - # Normalize the scores to get the log-likelihood. + # Normalize the scores to get the log-likelihood per example. log_likelihood = sequence_scores - log_norm return log_likelihood, transition_params -- GitLab From 6331c00951b8d02bcc143f08b15be08b3f078d73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 10:48:43 -0800 Subject: [PATCH 121/629] Clarify that the behavior of the iterator (advancing whenever any of the components is evaluated) is not magic, but a simple consequence of the dataflow graph structure. PiperOrigin-RevId: 185554089 --- tensorflow/docs_src/programmers_guide/datasets.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index 9ede4ab83c..d19200e80c 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -322,9 +322,10 @@ sess.run(iterator.initializer) next1, (next2, next3) = iterator.get_next() ``` -Note that evaluating *any* of `next1`, `next2`, or `next3` will advance the -iterator for all components. A typical consumer of an iterator will include all -components in a single expression. +Note that `next1`, `next2`, and `next3` are tensors produced by the +same op/node (created by `Iterator.get_next()`). Therefore, evaluating *any* of +these tensors will advance the iterator for all components. A typical consumer +of an iterator will include all components in a single expression. ## Reading input data -- GitLab From 3cbb41abe603aafcd1abcdd9ed6284e020177b08 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 10:48:58 -0800 Subject: [PATCH 122/629] Add empty scaffolding for loop optimizers in Grappler. PiperOrigin-RevId: 185554126 --- tensorflow/core/grappler/optimizers/BUILD | 37 +++++++++++ .../grappler/optimizers/loop_optimizer.cc | 46 ++++++++++++++ .../core/grappler/optimizers/loop_optimizer.h | 49 +++++++++++++++ .../optimizers/loop_optimizer_test.cc | 62 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 13 +++- .../core/protobuf/rewriter_config.proto | 2 + 6 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/loop_optimizer.cc create mode 100644 tensorflow/core/grappler/optimizers/loop_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/loop_optimizer_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 3432de9dcd..e839630605 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -371,6 +371,7 @@ cc_library( ":dependency_optimizer", ":graph_optimizer", ":layout_optimizer", + ":loop_optimizer", ":memory_optimizer", ":model_pruner", "//tensorflow/core:framework", @@ -380,3 +381,39 @@ cc_library( "//tensorflow/core/grappler/utils:topological_sort", ], ) + +cc_library( + name = "loop_optimizer", + srcs = ["loop_optimizer.cc"], + hdrs = [ + "loop_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_optimizer", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:graph_properties", + ], +) + +tf_cc_test( + name = "loop_optimizer_test", + size = "small", + srcs = ["loop_optimizer_test.cc"], + deps = [ + ":loop_optimizer", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + ], +) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc new file mode 100644 index 0000000000..102526e22f --- /dev/null +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -0,0 +1,46 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/loop_optimizer.h" + +#include +#include + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace tensorflow { +namespace grappler { + +Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + *optimized_graph = item.graph; + + return Status::OK(); +} + +void LoopOptimizer::Feedback(Cluster* /*cluster*/, const GrapplerItem& /*item*/, + const GraphDef& /*optimized_graph*/, + double /*result*/) { + // Nothing to do for LoopOptimizer. +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.h b/tensorflow/core/grappler/optimizers/loop_optimizer.h new file mode 100644 index 0000000000..106d4628ae --- /dev/null +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LOOP_OPTIMIZER_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LOOP_OPTIMIZER_H_ + +#include +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/protobuf/rewriter_config.pb.h" + +namespace tensorflow { +namespace grappler { + +class LoopOptimizer : public GraphOptimizer { + public: + LoopOptimizer() : opt_level_(RewriterConfig::ON) {} + explicit LoopOptimizer(RewriterConfig::Toggle opt_level) + : opt_level_(opt_level) {} + ~LoopOptimizer() override {} + + string name() const override { return "loop_optimizer"; }; + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; + + private: + RewriterConfig::Toggle opt_level_; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LOOP_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc new file mode 100644 index 0000000000..c09434f609 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -0,0 +1,62 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/loop_optimizer.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class LoopOptimizerTest : public ::testing::Test {}; + +void VerifyGraphsEqual(const GraphDef& original_graph, + const GraphDef& optimized_graph, const string& func) { + EXPECT_EQ(original_graph.node_size(), optimized_graph.node_size()) << func; + for (int i = 0; i < original_graph.node_size(); ++i) { + const NodeDef& original = original_graph.node(i); + const NodeDef& optimized = optimized_graph.node(i); + EXPECT_EQ(original.name(), optimized.name()) << func; + EXPECT_EQ(original.op(), optimized.op()) << func; + EXPECT_EQ(original.input_size(), optimized.input_size()) << func; + for (int j = 0; j < original.input_size(); ++j) { + EXPECT_EQ(original.input(j), optimized.input(j)) << func; + } + } +} + +TEST_F(LoopOptimizerTest, NoOp) { + // This trivial graph is so basic there's nothing to optimize. + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index ab7e05dd5d..e27b9df620 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/optimizers/loop_optimizer.h" #include "tensorflow/core/grappler/optimizers/memory_optimizer.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils/topological_sort.h" @@ -75,6 +76,9 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset( new DependencyOptimizer(cfg_.dependency_optimization())); } + if (optimizer == "loop") { + graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); + } return graph_optimizer; } @@ -97,6 +101,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new DependencyOptimizer(cfg_.dependency_optimization()))); } + if (cfg_.loop_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new LoopOptimizer(cfg_.loop_optimization()))); + } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); @@ -119,8 +127,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } else { std::set available_optimizers = { - "pruning", "constfold", "layout", "memory", - "autoparallel", "arithmetic", "dependency"}; + "pruning", "constfold", "layout", "memory", + "autoparallel", "arithmetic", "dependency", "loop"}; for (const auto& optimizer : cfg_.optimizers()) { if (available_optimizers.find(optimizer) != available_optimizers.end()) { optimizers.push_back(NewOptimizer(optimizer)); @@ -204,6 +212,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.layout_optimizer() != RewriterConfig::OFF || cfg.constant_folding() != RewriterConfig::OFF || cfg.dependency_optimization() != RewriterConfig::OFF || + cfg.loop_optimization() == RewriterConfig::ON || cfg.arithmetic_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 77667e4358..0e9e202bc9 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -37,6 +37,8 @@ message RewriterConfig { Toggle arithmetic_optimization = 7; // Control dependency optimizations (default is ON). Toggle dependency_optimization = 8; + // Loop optimizations (default is OFF). + Toggle loop_optimization = 9; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From c94a85e4bd0ded9b259e92bc10de236180f3206f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 13 Feb 2018 10:54:09 -0800 Subject: [PATCH 123/629] add missing blank line PiperOrigin-RevId: 185554969 --- tensorflow/docs_src/get_started/get_started_for_beginners.md | 4 ++++ tensorflow/docs_src/get_started/premade_estimators.md | 1 + 2 files changed, 5 insertions(+) diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md index fb235735bc..069f5c13dd 100644 --- a/tensorflow/docs_src/get_started/get_started_for_beginners.md +++ b/tensorflow/docs_src/get_started/get_started_for_beginners.md @@ -36,6 +36,7 @@ the following three: alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), @@ -188,6 +189,7 @@ provides a programming stack consisting of multiple API layers:
    + **The TensorFlow Programming Environment.**

     

    @@ -380,6 +382,7 @@ fully connected neural network consisting of three hidden layers:
    + **A neural network with three hidden layers.**

     

    @@ -568,6 +571,7 @@ of 0.5. The following suggests a more effective model:
  • Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
    tensorflow-1.6.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.6.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
    tensorflow-1.6.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.6.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
    tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    tensorflow_gpu-1.5.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
    tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
    5.5 2.5 4.0 1.3 1 1
    + **A model that is 80% accurate.**

     

    diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md index 4f01f997c3..6bffd2e065 100644 --- a/tensorflow/docs_src/get_started/premade_estimators.md +++ b/tensorflow/docs_src/get_started/premade_estimators.md @@ -98,6 +98,7 @@ classifies Iris flowers into three different species based on the size of their alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), -- GitLab From f83693a2aa9cf6c90d0cc214eab7524817390c54 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Tue, 13 Feb 2018 11:18:15 -0800 Subject: [PATCH 124/629] Allow other types of variables to act as a resource variable. Introduce resource_variable_ops.is_resource_variable() function that returns true if an _should_act_as_resource_variable attribute is set. PiperOrigin-RevId: 185559202 --- tensorflow/python/ops/gradients_impl.py | 2 +- tensorflow/python/ops/resource_variable_ops.py | 6 ++++++ tensorflow/python/training/slot_creator.py | 7 +------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 9f06c0ee1f..1418c0b10f 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -494,7 +494,7 @@ def gradients(ys, list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope: ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = [ - x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x + x.handle if resource_variable_ops.is_resource_variable(x) else x for x in xs ] xs = ops.internal_convert_n_to_tensor_or_indexed_slices( diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 75cb57f16f..11f452fa46 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -957,3 +957,9 @@ ops.register_proto_function( proto_type=variable_pb2.VariableDef, to_proto=_to_proto_fn, from_proto=_from_proto_fn) + + +def is_resource_variable(var): + """"Returns True if `var` is to be considered a ResourceVariable.""" + return isinstance(var, ResourceVariable) or hasattr( + var, "_should_act_as_resource_variable") diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py index 18a5b89d30..75ef3d5976 100644 --- a/tensorflow/python/training/slot_creator.py +++ b/tensorflow/python/training/slot_creator.py @@ -48,11 +48,6 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables -def _is_resource(v): - """Returns true if v is something you get from a resource variable.""" - return isinstance(v, resource_variable_ops.ResourceVariable) - - def _create_slot_var(primary, val, scope, validate_shape, shape, dtype): """Helper function for creating a slot variable.""" @@ -65,7 +60,7 @@ def _create_slot_var(primary, val, scope, validate_shape, shape, dtype): shape = shape if callable(val) else None slot = variable_scope.get_variable( scope, initializer=val, trainable=False, - use_resource=_is_resource(primary), + use_resource=resource_variable_ops.is_resource_variable(primary), shape=shape, dtype=dtype, validate_shape=validate_shape) variable_scope.get_variable_scope().set_partitioner(current_partitioner) -- GitLab From c65a96dd61c38a26da71c1cae7bc31dd790511cd Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 13 Feb 2018 11:33:12 -0800 Subject: [PATCH 125/629] Error out or log a warning if user sets the TPUConfig.num_shards incorrectly. Also improve TPU system metadata print out message. PiperOrigin-RevId: 185561680 --- .../contrib/tpu/python/tpu/tpu_context.py | 23 +++++++++++++++++++ .../tpu/python/tpu/tpu_system_metadata.py | 11 +++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index d735618260..344ff9a37f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -411,6 +411,29 @@ class _TPUContext(object): 'Tensorflow master address and TPU worker(s). Available devices ' 'are {}.'.format(tpu_system_metadata.devices)) + if self._config.tpu_config.num_shards: + user_provided_num_replicas = self._config.tpu_config.num_shards + if user_provided_num_replicas != num_replicas: + message = ( + 'TPUConfig.num_shards is not set correctly. According to TPU ' + 'system metadata for Tensorflow master ({}): num_replicas should ' + 'be ({}), got ({}). For non-model-parallelism, num_replicas should ' + 'be the total num of TPU cores in the system. For ' + 'model-parallelism, the total number of TPU cores should be ' + 'product(computation_shape) * num_replicas. Please set it ' + 'accordingly or leave it as `None`'.format( + self._get_master_address(), num_replicas, + user_provided_num_replicas)) + + if self.model_parallelism_enabled: + raise ValueError(message) + else: + logging.warning(message) + logging.warning( + 'For non-model-parallelism, TPUEstimator currently ' + 'automatically queries the TPU system information so ignores ' + 'this field.') + if mode == model_fn_lib.ModeKeys.TRAIN: if self._train_batch_size % num_replicas != 0: raise ValueError( diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py index ea1a82959c..493d1848c0 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_system_metadata.py @@ -114,8 +114,15 @@ def _query_tpu_system_metadata(master_address, run_config, topology=topology, devices=devices) - msg = 'Found TPU system %s' if tpu_core_count else 'Failed to find TPU: %s' - logging.info(msg, metadata) + if tpu_core_count: + logging.info('Found TPU system:') + logging.info('*** Num TPU Cores: %d', metadata.num_cores) + logging.info('*** Num TPU Workers: %d', metadata.num_hosts) + logging.info('*** Num TPU Cores Per Worker: %d', + metadata.num_of_cores_per_host) + logging.info('*** Available Devices: %s', metadata.devices) + else: + logging.info('Failed to find TPU: %s', metadata) return metadata -- GitLab From 20dc0dec68d539563729c7533aece9d1f0f8e128 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 13 Feb 2018 11:46:08 -0800 Subject: [PATCH 126/629] Explicitely place the swap-in node: this ensures that subsequent rounds of memory optimization have a more accurate picture of the placement. PiperOrigin-RevId: 185563797 --- tensorflow/core/grappler/optimizers/memory_optimizer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 777cc3a79b..3057ee5fa1 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -730,6 +730,7 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, *swap_in_node->add_input() = swap_out_node->name(); // Colocate the swap_in_ node with the node itself. + swap_in_node->set_device(node->device()); string coloc_group = strings::StrCat("loc@", tensor_to_swap); (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); -- GitLab From 142351b998a6471f26b5c9ba74d09b107cd96c68 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 13 Feb 2018 11:56:59 -0800 Subject: [PATCH 127/629] Minor eager-related performance improvements - Add a cache for name_scope - skip some overhead _MulGrad and _MatMulGrad PiperOrigin-RevId: 185565363 --- tensorflow/python/framework/ops.py | 10 +++++++++- tensorflow/python/ops/math_grad.py | 26 +++++++++++++++----------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 77e83554c9..398b3f67e2 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5537,6 +5537,9 @@ def get_all_collection_keys(): return get_default_graph().get_all_collection_keys() +name_scope_cache = {} + + # Named like a function for backwards compatibility with the # @tf_contextlib.contextmanager version, which was switched to a class to avoid # some object creation overhead. @@ -5596,7 +5599,11 @@ class name_scope(object): # pylint: disable=invalid-name if not self._name: scope_name = "" else: - if self._name[-1] == "/": + cache_key = self._name, self._old_name, self._default_name + if cache_key in name_scope_cache: + self._ctx.scope_name = name_scope_cache[cache_key] + return self._ctx.scope_name + elif self._name[-1] == "/": # A trailing slash breaks out of nested name scopes, indicating a # fully specified scope name, for compatibility with Graph.name_scope. scope_name = self._name @@ -5605,6 +5612,7 @@ class name_scope(object): # pylint: disable=invalid-name scope_name = ( self._old_name + name_with_trailing_slash if self._old_name else name_with_trailing_slash) + name_scope_cache[cache_key] = scope_name self._ctx.scope_name = scope_name return scope_name else: diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 53308484c4..9d5289f23d 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -791,11 +791,13 @@ def _MulGrad(op, grad): sx = array_ops.shape(x) sy = array_ops.shape(y) rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy) - # pylint: enable=protected-access x = math_ops.conj(x) y = math_ops.conj(y) - return (array_ops.reshape(math_ops.reduce_sum(grad * y, rx), sx), - array_ops.reshape(math_ops.reduce_sum(x * grad, ry), sy)) + return (array_ops.reshape( + math_ops.reduce_sum(gen_math_ops._mul(grad, y), rx), sx), + array_ops.reshape( + math_ops.reduce_sum(gen_math_ops._mul(x, grad), ry), sy)) + # pylint: enable=protected-access @ops.RegisterGradient("Div") @@ -968,18 +970,20 @@ def _MatMulGrad(op, grad): t_b = op.get_attr("transpose_b") a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) + # pylint: disable=protected-access if not t_a and not t_b: - grad_a = math_ops.matmul(grad, b, transpose_b=True) - grad_b = math_ops.matmul(a, grad, transpose_a=True) + grad_a = gen_math_ops._mat_mul(grad, b, transpose_b=True) + grad_b = gen_math_ops._mat_mul(a, grad, transpose_a=True) elif not t_a and t_b: - grad_a = math_ops.matmul(grad, b) - grad_b = math_ops.matmul(grad, a, transpose_a=True) + grad_a = gen_math_ops._mat_mul(grad, b) + grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True) elif t_a and not t_b: - grad_a = math_ops.matmul(b, grad, transpose_b=True) - grad_b = math_ops.matmul(a, grad) + grad_a = gen_math_ops._mat_mul(b, grad, transpose_b=True) + grad_b = gen_math_ops._mat_mul(a, grad) elif t_a and t_b: - grad_a = math_ops.matmul(b, grad, transpose_a=True, transpose_b=True) - grad_b = math_ops.matmul(grad, a, transpose_a=True, transpose_b=True) + grad_a = gen_math_ops._mat_mul(b, grad, transpose_a=True, transpose_b=True) + grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True, transpose_b=True) + # pylint: enable=protected-access return grad_a, grad_b -- GitLab From ca07b694fc5307b9eec6e3c449382d823b78a162 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 13 Feb 2018 12:10:40 -0800 Subject: [PATCH 128/629] Add cache for _zeros in backprop PiperOrigin-RevId: 185567508 --- tensorflow/python/eager/backprop.py | 35 ++++++++++++++++++++---- tensorflow/python/framework/test_util.py | 3 +- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index d76fecf126..d8e13d7231 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import functools import operator import threading @@ -42,6 +43,26 @@ from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect +class _TensorCache(object): + """Simple cache which evicts items based on length in a FIFO manner.""" + + def __init__(self, max_items=256): + self._data = collections.OrderedDict() + self._max_items = max_items if max_items else 256 + + def put(self, key, value): + self._data[key] = value + + if len(self._data) > self._max_items: + self._data.popitem(last=False) + + def get(self, key): + return self._data.get(key, None) + + def flush(self): + self._data = {} + + _op_attr_type_cache = {} @@ -734,8 +755,7 @@ def _num_elements(grad): raise ValueError("`grad` not a Tensor or IndexedSlices.") -_last_zero_shape_dtype = [None, None] -_last_zero = [None] +_zeros_cache = _TensorCache() def _fast_fill(value, shape, dtype): @@ -744,14 +764,17 @@ def _fast_fill(value, shape, dtype): def _zeros(shape, dtype): """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" + device = context.context().device_name if dtype == dtypes.variant: # TODO(apassos): need to save enough information about variant tensors to do # a zeros return None - if [shape, dtype] != _last_zero_shape_dtype: - _last_zero_shape_dtype[:] = [shape, dtype] - _last_zero[0] = _fast_fill(0, shape, dtype) - return _last_zero[0] + cache_key = shape, dtype, device + cached = _zeros_cache.get(cache_key) + if cached is None: + cached = _fast_fill(0, shape, dtype) + _zeros_cache.put(cache_key, cached) + return cached def _ones(shape, dtype): diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index bfdd98819e..c09e2d8084 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -463,8 +463,7 @@ def assert_no_new_tensors(f): f(self, **kwargs) # Make an effort to clear caches, which would otherwise look like leaked # Tensors. - backprop._last_zero = [None] - backprop._shape_dtype = [None, None] + backprop._zeros_cache.flush() context.get_default_context().scalar_cache().clear() gc.collect() tensors_after = [ -- GitLab From a3496e14f7ca0f77b804b5be87cd43f919a7c09f Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Tue, 13 Feb 2018 12:24:02 -0800 Subject: [PATCH 129/629] Fix bug in populating the execution plan sent to the delegate. - memcpy was missing the array size. - modified the unit test to verify that the execution plan is trivial on first delegate invocation. PiperOrigin-RevId: 185569606 --- tensorflow/contrib/lite/interpreter.cc | 2 +- tensorflow/contrib/lite/interpreter_test.cc | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 6dea4e5916..028449211b 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -166,7 +166,7 @@ TfLiteStatus Interpreter::GetExecutionPlan(TfLiteIntArray** execution_plan) { static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]), "TfLiteIntArray and execution_plan do not contain same type."); memcpy(plan_cache_->data, execution_plan_.data(), - sizeof(plan_cache_->data[0])); + sizeof(plan_cache_->data[0]) * execution_plan_.size()); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 4b309748f7..28c96e5dde 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -773,6 +773,8 @@ class TestDelegate : public ::testing::Test { for (int exec_index = 0; exec_index < execution_plan->size; exec_index++) { int node_index = execution_plan->data[exec_index]; + // Check that we are an identity map to start. + TFLITE_CHECK_EQ(exec_index, node_index); TfLiteNode* node; TfLiteRegistration* reg; context->GetNodeAndRegistration(context, node_index, &node, ®); -- GitLab From dedafb73031c5588c1254e8fabd553031b15870a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 13 Feb 2018 12:31:59 -0800 Subject: [PATCH 130/629] Extract the filter and input shape for Conv2DBackpropFilter/Conv2DBackpropInput from the corresponding op inputs whenever possible. PiperOrigin-RevId: 185570750 --- .../grappler/costs/op_level_cost_estimator.cc | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 76db1afd4a..a57cfdd989 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -724,18 +724,35 @@ int64 OpLevelCostEstimator::CountConv2DBackpropInputOperations( bool* found_unknown_shapes) const { int64 ops = 0; - if (op_features.op() != kConv2dBackpropInput) { - LOG(ERROR) << "Invalid Operation"; + DCHECK_EQ(kConv2dBackpropInput, op_features.op()); + + if (op_features.inputs_size() < 2) { + *found_unknown_shapes = true; return ops; } - if (op_features.outputs_size() != 1) { - // Need _output_shapes for input shape. - LOG(ERROR) << "No output shape in Conv2DBackpropInput op."; - return ops; + TensorShapeProto input_shape; + if (op_features.inputs(0).has_value()) { + const TensorProto& value = op_features.inputs(0).value(); + if (value.int64_val_size() > 0) { + for (int i = 0; i < value.int64_val_size(); ++i) { + input_shape.add_dim()->set_size(value.int64_val(i)); + } + } else { + for (int i = 0; i < value.int_val_size(); ++i) { + input_shape.add_dim()->set_size(value.int_val(i)); + } + } + } else if (op_features.outputs_size() == 1) { + input_shape = op_features.outputs(0).shape(); + } else { + // Set the minimum filter size that's feasible. + for (int i = 0; i < 4; ++i) { + input_shape.add_dim()->set_size(1); + } + *found_unknown_shapes = true; } - const auto& input_shape = op_features.outputs(0).shape(); ConvolutionDimensions conv_dims = ConvolutionDimensionsFromInputs( input_shape, op_features.inputs(1).shape(), op_features, found_unknown_shapes); @@ -758,18 +775,34 @@ int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations( const OpInfo& op_features, ConvolutionDimensions* returned_conv_dims, bool* found_unknown_shapes) const { int64 ops = 0; - if (op_features.op() != kConv2dBackpropFilter) { - LOG(ERROR) << "Invalid Operation"; - return ops; + DCHECK_EQ(kConv2dBackpropFilter, op_features.op()); + + TensorShapeProto filter_shape; + if (op_features.inputs_size() >= 2 && op_features.inputs(1).has_value()) { + const TensorProto& value = op_features.inputs(1).value(); + if (value.int64_val_size() > 0) { + for (int i = 0; i < value.int64_val_size(); ++i) { + filter_shape.add_dim()->set_size(value.int64_val(i)); + } + } else { + for (int i = 0; i < value.int_val_size(); ++i) { + filter_shape.add_dim()->set_size(value.int_val(i)); + } + } + } else if (op_features.outputs_size() == 1) { + filter_shape = op_features.outputs(0).shape(); + } else { + // Set the minimum filter size that's feasible. + for (int i = 0; i < 4; ++i) { + filter_shape.add_dim()->set_size(1); + } + *found_unknown_shapes = true; } - if (op_features.outputs_size() != 1) { - // Need _output_shapes for input shape. - LOG(ERROR) << "No output shape in Conv2DBackpropFilter op."; + if (op_features.inputs_size() < 1) { + *found_unknown_shapes = true; return ops; } - - const auto& filter_shape = op_features.outputs(0).shape(); ConvolutionDimensions conv_dims = ConvolutionDimensionsFromInputs( op_features.inputs(0).shape(), filter_shape, op_features, found_unknown_shapes); -- GitLab From 14b3a42a42a488a32eae0951b9d4a92da2cd56a2 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 13 Feb 2018 12:45:27 -0800 Subject: [PATCH 131/629] Make LLVM IR dumps more readable Before this the IR we dumped out would contain constant tensors that were sometimes huge resulting in unweildy IR files. With this change we dump out a "noconst" IR file that has the constant initializers stripped out. PiperOrigin-RevId: 185572700 --- tensorflow/compiler/xla/service/llvm_ir/BUILD | 1 + .../compiler/xla/service/llvm_ir/llvm_util.cc | 41 +++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index ffc78bd5cf..37261ed1e6 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -54,6 +54,7 @@ cc_library( "@llvm//:core", "@llvm//:support", "@llvm//:target", + "@llvm//:transform_utils", ], ) diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 8d1e6338e1..15a4cf8532 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -23,6 +23,7 @@ limitations under the License. #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" @@ -61,6 +62,15 @@ llvm::StringRef AsStringRef(tensorflow::StringPiece str) { return llvm::StringRef(str.data(), str.size()); } +std::unique_ptr DropConstantInitializers( + const llvm::Module& module) { + std::unique_ptr cloned_module = CloneModule(&module); + for (llvm::GlobalVariable& global_var : cloned_module->globals()) { + global_var.setInitializer(nullptr); + } + return cloned_module; +} + string DumpModuleToString(const llvm::Module& module) { std::string buffer_string; llvm::raw_string_ostream ostream(buffer_string); @@ -672,6 +682,19 @@ static string GetProcessUniqueIrFileName(tensorflow::StringPiece prefix) { return uniquer->GetUniqueName(prefix); } +static Status CreateAndWriteStringToFile(const string& directory_name, + const string& file_name, + const string& text) { + std::unique_ptr f; + TF_RETURN_IF_ERROR( + tensorflow::Env::Default()->RecursivelyCreateDir(directory_name)); + TF_RETURN_IF_ERROR( + tensorflow::Env::Default()->NewWritableFile(file_name, &f)); + TF_RETURN_IF_ERROR(f->Append(text)); + TF_RETURN_IF_ERROR(f->Close()); + return Status::OK(); +} + Status DumpIRToDirectory(const string& directory_name, const string& hlo_module_name, const llvm::Module& llvm_module, bool optimized) { @@ -686,13 +709,17 @@ Status DumpIRToDirectory(const string& directory_name, directory_name, tensorflow::strings::StrCat(unique_and_safe_file_name, ".ll")); - std::unique_ptr f; - TF_RETURN_IF_ERROR( - tensorflow::Env::Default()->RecursivelyCreateDir(directory_name)); - TF_RETURN_IF_ERROR( - tensorflow::Env::Default()->NewWritableFile(ir_file_name, &f)); - TF_RETURN_IF_ERROR(f->Append(DumpModuleToString(llvm_module))); - return f->Close(); + // For some models the embedded constants can be huge, so also dump the module + // with the constants stripped to get IR that is easier to manipulate. + string ir_no_constant_initializers_file_name = tensorflow::io::JoinPath( + directory_name, + tensorflow::strings::StrCat(unique_and_safe_file_name, "-noconst.ll")); + + TF_RETURN_IF_ERROR(CreateAndWriteStringToFile( + directory_name, ir_file_name, DumpModuleToString(llvm_module))); + return CreateAndWriteStringToFile( + directory_name, ir_no_constant_initializers_file_name, + DumpModuleToString(*DropConstantInitializers(llvm_module))); } llvm::Function* CreateFunction(llvm::FunctionType* function_type, -- GitLab From 21bce0c45ca10e2892d70dd84afed5101d76b4d0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 13:30:20 -0800 Subject: [PATCH 132/629] Add `Kumaraswamy` to distributions __init__.py PiperOrigin-RevId: 185578929 --- tensorflow/contrib/distributions/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 60a187e541..f0517142ab 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -40,6 +40,7 @@ from tensorflow.contrib.distributions.python.ops.geometric import * from tensorflow.contrib.distributions.python.ops.half_normal import * from tensorflow.contrib.distributions.python.ops.independent import * from tensorflow.contrib.distributions.python.ops.inverse_gamma import * +from tensorflow.contrib.distributions.python.ops.kumaraswamy import * from tensorflow.contrib.distributions.python.ops.logistic import * from tensorflow.contrib.distributions.python.ops.mixture import * from tensorflow.contrib.distributions.python.ops.mixture_same_family import * @@ -115,6 +116,7 @@ _allowed_symbols = [ 'Independent', 'InverseGamma', 'InverseGammaWithSoftplusConcentrationRate', + 'Kumaraswamy', 'Laplace', 'LaplaceWithSoftplusScale', 'Logistic', -- GitLab From 2ae7cec92783fb82a381bd7ac3b32d4b8ccf55dd Mon Sep 17 00:00:00 2001 From: Zhixian Yan Date: Tue, 13 Feb 2018 13:42:11 -0800 Subject: [PATCH 133/629] Internal Change PiperOrigin-RevId: 185580787 --- tensorflow/contrib/lite/testing/generate_examples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 6264daa988..a86c64849f 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -241,7 +241,7 @@ def create_tensor_data(dtype, shape, min_value=-100, max_value=100): if dtype in (tf.float32, tf.float16): value = (max_value-min_value)*np.random.random_sample(shape)+min_value elif dtype in (tf.int32, tf.uint8, tf.int64): - value = np.random.random_integers(min_value, max_value, shape) + value = np.random.randint(min_value, max_value+1, shape) return value.astype(dtype) -- GitLab From 435a71ef386c7b61cd0bef814f7ac63f0b68b028 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 13 Feb 2018 13:54:52 -0800 Subject: [PATCH 134/629] [TF:XLA] Bump open source llvm revision to r324990 PiperOrigin-RevId: 185582753 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 14a4281fae..ae45cf26af 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -473,11 +473,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/14498370e4dd4de99369b6129328ffcff737fc97.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/14498370e4dd4de99369b6129328ffcff737fc97.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/c6a263bfa2fcdb7b56ce17f650406c3313f5deb6.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/c6a263bfa2fcdb7b56ce17f650406c3313f5deb6.tar.gz", ], - sha256 = "ac8033652d5813f5454bcd32b9530169c1a8a523366e1e76315b319c99a91c83", - strip_prefix = "llvm-14498370e4dd4de99369b6129328ffcff737fc97", + sha256 = "0fcb58ff87c8ecad770a6db5138425d244fdc7aec710da0ae301c947166c53a9", + strip_prefix = "llvm-c6a263bfa2fcdb7b56ce17f650406c3313f5deb6", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From fcb7d92f70cfbbbea0c7ac11346b369fe5e6bd0c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 13:56:24 -0800 Subject: [PATCH 135/629] Use a more advanced py_func wrapper, one that allows non-tensor args to be passed directly to the wrapped function. PiperOrigin-RevId: 185583023 --- .../contrib/py2tf/converters/call_trees.py | 31 +------ .../py2tf/converters/call_trees_test.py | 23 +++++ tensorflow/contrib/py2tf/utils/BUILD | 12 +++ tensorflow/contrib/py2tf/utils/__init__.py | 1 + tensorflow/contrib/py2tf/utils/py_func.py | 69 ++++++++++++++ .../contrib/py2tf/utils/py_func_test.py | 91 +++++++++++++++++++ 6 files changed, 198 insertions(+), 29 deletions(-) create mode 100644 tensorflow/contrib/py2tf/utils/py_func.py create mode 100644 tensorflow/contrib/py2tf/utils/py_func_test.py diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index 60096d5a7b..1050ba654c 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -28,10 +28,8 @@ import gast from tensorflow.contrib.py2tf.pyct import anno from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer -from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno from tensorflow.python.util import tf_inspect @@ -198,36 +196,11 @@ class CallTreeTransformer(transformer.Base): return node def _wrap_to_py_func_no_return(self, node): - func_qn = anno.getanno(node.func, anno.Basic.QN) - args_scope = anno.getanno(node, NodeAnno.ARGS_SCOPE) - wrapper_name = self.context.namer.new_symbol(func_qn.ssf(), - args_scope.referenced) - wrapper_args = [] - for arg in node.args: - if anno.hasanno(arg, anno.Basic.QN): - arg_qn = anno.getanno(arg, anno.Basic.QN) - else: - arg_qn = qual_names.QN('arg') - wrapper_args.append( - self.context.namer.new_symbol(arg_qn.ssf(), args_scope.referenced)) # TODO(mdan): Properly handle varargs, kwargs, etc. - # TODO(mdan): This is best handled as a dynamic dispatch. - # That way we can separate tensors from non-tensor args. template = """ - def wrapper(wrapper_args): - call(wrapper_args) - return 1 - tf.py_func(wrapper, original_args, [tf.int64]) + py2tf_utils.wrap_py_func(func, None, (original_args,), True) """ - wrapper_def, call_expr = templates.replace( - template, - call=node.func, - wrapper=wrapper_name, - original_args=gast.List(elts=node.args, ctx=None), - wrapper_args=wrapper_args) - anno.setanno(wrapper_def, anno.Basic.SKIP_PROCESSING, True) - - return (wrapper_def, call_expr) + return templates.replace(template, func=node.func, original_args=node.args) def _function_is_compilable(self, target_entity): # TODO(mdan): This is just a placeholder. Implement. diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index 18a5c1e6e3..777648dc0b 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -66,6 +66,29 @@ class CallTreesTest(converter_test_base.TestCase): tc = TestClass() self.assertEquals(3, result.test_fn_2(tc, 1)) + def test_py_func_wrap_no_retval(self): + + def test_fn(a): + setattr(a, 'foo', 'bar') + + node = self.parse_and_analyze(test_fn, {'setattr': setattr}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node) as result: + with self.test_session() as sess: + # The function has no return value, so we do some tricks to grab the + # generated py_func node and ensure its effect only happens at graph + # execution. + + class Dummy(object): + pass + + a = Dummy() + result.test_fn(a) + self.assertFalse(hasattr(a, 'foo')) + sess.run(sess.graph.get_operations()[0]) + self.assertEquals('bar', a.foo) + def test_uncompiled_modules(self): def test_fn(a): diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c2987fcace..74853e50f7 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -23,11 +23,13 @@ py_library( "context_managers.py", "misc.py", "multiple_dispatch.py", + "py_func.py", "type_check.py", ], srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], deps = [ + "//tensorflow/python:script_ops", "@six_archive//:six", ], ) @@ -62,6 +64,16 @@ py_test( ], ) +py_test( + name = "py_func_test", + srcs = ["py_func_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":utils", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 1cbb0e0029..838c29aafd 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -22,4 +22,5 @@ from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_o from tensorflow.contrib.py2tf.utils.misc import alias_tensors from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while +from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/py_func.py b/tensorflow/contrib/py2tf/utils/py_func.py new file mode 100644 index 0000000000..838872d092 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/py_func.py @@ -0,0 +1,69 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Pyfunc creation utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import script_ops + + +def wrap_py_func(f, return_dtypes, arguments, use_dummy_return=False): + """Helper that wraps a callable to py_func. + + The helper passes tensor arguments through the py_func interface. Non-tensor + arguments are allowed, and will be passed to f directly. Note that non-tensor + arguments are captured by f will not update every time the wrapper is + called (this is consistent with its argument list, which only includes + the tensor arguments). In general, it's safest not to reuse this wrapper. + + Args: + f: Callable + return_dtypes: DType, tuple, list or None, the data type for each of f's + return value. None if f has no return values or use_dummy_return is + True. + arguments: Arguments for f + use_dummy_return: If True, the function will return a dummy value of 1 + and discard its actual return value. + Returns: + The return values of f converted to tensor. + Raises: + ValueError: if the arguments are incorrect. + """ + + if return_dtypes and use_dummy_return: + raise ValueError('if use_dummy_return is True, return_dtypes must be empty') + + n = len(arguments) + arg_is_tensor = tuple(map(tensor_util.is_tensor, arguments)) + index_in_tensor_list = [0] * n + i = 0 + for j in range(n): + index_in_tensor_list[j] = i + if arg_is_tensor[j]: + i += 1 + + def f_wrapper(*tensor_args): + f_args = tuple(tensor_args[index_in_tensor_list[i]] + if arg_is_tensor[i] else arguments[i] for i in range(n)) + retval = f(*f_args) + return 1 if use_dummy_return else retval + + return script_ops.py_func( + f_wrapper, tuple(arguments[i] for i in range(n) if arg_is_tensor[i]), + dtypes.int64 if use_dummy_return else return_dtypes) diff --git a/tensorflow/contrib/py2tf/utils/py_func_test.py b/tensorflow/contrib/py2tf/utils/py_func_test.py new file mode 100644 index 0000000000..776b5309c6 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/py_func_test.py @@ -0,0 +1,91 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for wrap_py_func module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.platform import test + + +class PyFuncTest(test.TestCase): + + def test_wrap_py_func_simple(self): + + def test_fn(a, b, c): + return a + b + c + + with self.test_session() as sess: + tensor_1 = constant_op.constant(1) + self.assertEqual(3, + sess.run( + py_func.wrap_py_func(test_fn, dtypes.int64, + (1, tensor_1, 1)))) + self.assertEqual(3, + sess.run( + py_func.wrap_py_func(test_fn, dtypes.int64, + (1, 1, 1)))) + self.assertEqual(3, + sess.run( + py_func.wrap_py_func(test_fn, dtypes.int64, + (tensor_1, 1, tensor_1)))) + + def test_wrap_py_func_complex_args(self): + + class TestClass(object): + + def __init__(self): + self.foo = 5 + + def test_fn(a, b): + return a * b.foo + + with self.test_session() as sess: + self.assertEqual(35, + sess.run( + py_func.wrap_py_func(test_fn, dtypes.int64, + (7, TestClass())))) + self.assertEqual( + 35, + sess.run( + py_func.wrap_py_func(test_fn, dtypes.int64, + (constant_op.constant(7), TestClass())))) + + def test_wrap_py_func_dummy_return(self): + + side_counter = [0] + + def test_fn(_): + side_counter[0] += 1 + + with self.test_session() as sess: + self.assertEqual(1, + sess.run( + py_func.wrap_py_func(test_fn, None, (5,), True))) + self.assertEqual([1], side_counter) + self.assertEqual(1, + sess.run( + py_func.wrap_py_func(test_fn, None, + (constant_op.constant(5),), + True))) + self.assertEqual([2], side_counter) + + +if __name__ == '__main__': + test.main() -- GitLab From 19b3ef15a996b26b2dde13b3aef45c8e8af38614 Mon Sep 17 00:00:00 2001 From: resec Date: Wed, 14 Feb 2018 06:37:54 +0800 Subject: [PATCH 136/629] add not equal op to tf_op_files.txt (#14319) -- GitLab From 921b0adf8b93ccad54eb0a82e42ff4b742e176db Mon Sep 17 00:00:00 2001 From: DONGGEON LIM Date: Tue, 13 Feb 2018 23:38:11 +0100 Subject: [PATCH 137/629] Add label_wav_dir.py (#14847) * Add label_wav_dir.py * Modify label_wav_dir.py --- .../examples/speech_commands/label_wav_dir.py | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tensorflow/examples/speech_commands/label_wav_dir.py diff --git a/tensorflow/examples/speech_commands/label_wav_dir.py b/tensorflow/examples/speech_commands/label_wav_dir.py new file mode 100644 index 0000000000..2f305359e3 --- /dev/null +++ b/tensorflow/examples/speech_commands/label_wav_dir.py @@ -0,0 +1,136 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +r"""Runs a trained audio graph against WAVE files and reports the results. + +The model, labels and .wav files specified in the arguments will be loaded, and +then the predictions from running the model against the audio data will be +printed to the console. This is a useful script for sanity checking trained +models, and as an example of how to use an audio model from Python. + +Here's an example of running it: + +python tensorflow/examples/speech_commands/label_wav_dir.py \ +--graph=/tmp/my_frozen_graph.pb \ +--labels=/tmp/speech_commands_train/conv_labels.txt \ +--wav_dir=/tmp/speech_dataset/left + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import glob + +import tensorflow as tf + +# pylint: disable=unused-import +from tensorflow.contrib.framework.python.ops import audio_ops as contrib_audio +# pylint: enable=unused-import + +FLAGS = None + + +def load_graph(filename): + """Unpersists graph from file as default graph.""" + with tf.gfile.FastGFile(filename, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + +def load_labels(filename): + """Read in labels, one label per line.""" + return [line.rstrip() for line in tf.gfile.GFile(filename)] + + +def run_graph(wav_dir, labels, input_layer_name, output_layer_name, + num_top_predictions): + """Runs the audio data through the graph and prints predictions.""" + with tf.Session() as sess: + # Feed the audio data as input to the graph. + # predictions will contain a two-dimensional array, where one + # dimension represents the input image count, and the other has + # predictions per class + for wav_path in glob.glob(wav_dir + "/*.wav"): + if not wav_path or not tf.gfile.Exists(wav_path): + tf.logging.fatal('Audio file does not exist %s', wav_path) + + with open(wav_path, 'rb') as wav_file: + wav_data = wav_file.read() + + softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name) + predictions, = sess.run(softmax_tensor, {input_layer_name: wav_data}) + + # Sort to show labels in order of confidence + print('\n%s' % (wav_path.split('/')[-1])) + top_k = predictions.argsort()[-num_top_predictions:][::-1] + for node_id in top_k: + human_string = labels[node_id] + score = predictions[node_id] + print('%s (score = %.5f)' % (human_string, score)) + + return 0 + + +def label_wav(wav_dir, labels, graph, input_name, output_name, how_many_labels): + """Loads the model and labels, and runs the inference to print predictions.""" + if not labels or not tf.gfile.Exists(labels): + tf.logging.fatal('Labels file does not exist %s', labels) + + if not graph or not tf.gfile.Exists(graph): + tf.logging.fatal('Graph file does not exist %s', graph) + + labels_list = load_labels(labels) + + # load graph, which is stored in the default session + load_graph(graph) + + run_graph(wav_dir, labels_list, input_name, output_name, how_many_labels) + + +def main(_): + """Entry point for script, converts flags to arguments.""" + label_wav(FLAGS.wav_dir, FLAGS.labels, FLAGS.graph, FLAGS.input_name, + FLAGS.output_name, FLAGS.how_many_labels) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + '--wav_dir', type=str, default='', help='Audio file to be identified.') + parser.add_argument( + '--graph', type=str, default='', help='Model to use for identification.') + parser.add_argument( + '--labels', type=str, default='', help='Path to file containing labels.') + parser.add_argument( + '--input_name', + type=str, + default='wav_data:0', + help='Name of WAVE data input node in model.') + parser.add_argument( + '--output_name', + type=str, + default='labels_softmax:0', + help='Name of node outputting a prediction in the model.') + parser.add_argument( + '--how_many_labels', + type=int, + default=3, + help='Number of results to show.') + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) -- GitLab From 89e7941fe6fe85af4fb7fe5499871d6b9d1c36ab Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 13 Feb 2018 14:51:25 -0800 Subject: [PATCH 138/629] [TF-XLA] Disable Tensorflow's CSE in xla compiler No need to do CSE in TF-XLA bridge, as XLA already has its own CSE pass later in the compilation pipeline. This removes one source of nondeterminism. RELNOTES: CSE pass from Tensorflow is now disabled in XLA. PiperOrigin-RevId: 185592383 --- tensorflow/compiler/tf2xla/xla_compiler.cc | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index c5b4ec5b15..59e8830442 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -153,7 +153,8 @@ std::unique_ptr XlaCompiler::GetGraph(const FunctionBody* fbody) { std::unique_ptr graph(new Graph(options_.flib_def)); CopyGraph(*fbody->graph, graph.get()); OptimizerOptions opts; - opts.set_do_common_subexpression_elimination(true); + opts.set_opt_level(OptimizerOptions::L0); + opts.set_do_common_subexpression_elimination(false); opts.set_do_function_inlining(true); opts.set_do_constant_folding(true); GraphOptimizer optimizer(opts); @@ -184,8 +185,7 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options, CheckSignature(fbody->arg_types, args), "Signature check failure while compiling: ", function.name()); - std::unique_ptr graph(new Graph(options_.flib_def)); - CopyGraph(*fbody->graph, graph.get()); + std::unique_ptr graph = GetGraph(fbody); // _Arg and _Retval nodes don't exist in the stored subgraph for the function; // they are added by the function body looked up. Therefore, they don't have @@ -213,15 +213,6 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options, *graph); } - // Optimize the graph before running the compiler. - OptimizerOptions opts; - opts.set_do_common_subexpression_elimination(true); - opts.set_do_function_inlining(true); - opts.set_do_constant_folding(true); - GraphOptimizer optimizer(opts); - optimizer.Optimize(flib_runtime_, flib_runtime_->env(), - /*device=*/nullptr, &graph, /*shape_map=*/nullptr); - VLOG(1) << "===================================================="; TF_RETURN_IF_ERROR( CompileGraph(options, function_id, std::move(graph), args, result)); -- GitLab From 858ce1bccd9dd084ed9cb35eb5629e3a349cc7c2 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Tue, 13 Feb 2018 14:52:31 -0800 Subject: [PATCH 139/629] Code cleanup: Made Executor related API take std::unique_ptr instead of const Graph* as input. PiperOrigin-RevId: 185592574 --- .../core/common_runtime/direct_session.cc | 2 +- tensorflow/core/common_runtime/executor.cc | 22 ++-- tensorflow/core/common_runtime/executor.h | 8 +- tensorflow/core/common_runtime/function.cc | 2 +- .../core/common_runtime/function_test.cc | 6 +- .../core/common_runtime/graph_runner.cc | 14 +-- .../kernel_benchmark_testlib.cc | 6 +- .../core/distributed_runtime/executor_test.cc | 102 +++++++++--------- .../core/distributed_runtime/graph_mgr.cc | 2 +- 9 files changed, 83 insertions(+), 81 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index df6f4b8877..ecbffcbf6c 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1250,7 +1250,7 @@ Status DirectSession::GetOrCreateExecutors( item->device = device; Executor* executor; TF_RETURN_IF_ERROR( - NewLocalExecutor(params, partition_graph.release(), &executor)); + NewLocalExecutor(params, std::move(partition_graph), &executor)); item->executor.reset(executor); } diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 6998cbecee..b06b75d658 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -332,8 +332,8 @@ class GraphView { class ExecutorImpl : public Executor { public: - ExecutorImpl(const LocalExecutorParams& p, const Graph* g) - : params_(p), graph_(g), gview_() { + ExecutorImpl(const LocalExecutorParams& p, std::unique_ptr g) + : params_(p), graph_(std::move(g)), gview_() { CHECK(p.create_kernel != nullptr); CHECK(p.delete_kernel != nullptr); } @@ -348,7 +348,6 @@ class ExecutorImpl : public Executor { for (auto fiter : frame_info_) { delete fiter.second; } - delete graph_; } Status Initialize(); @@ -412,7 +411,7 @@ class ExecutorImpl : public Executor { // Owned. LocalExecutorParams params_; - const Graph* graph_; + std::unique_ptr graph_; GraphView gview_; // A cached value of params_ @@ -605,11 +604,11 @@ void GetMaxPendingCounts(const Node* n, size_t* max_pending, } Status ExecutorImpl::Initialize() { - gview_.Initialize(graph_); + gview_.Initialize(graph_.get()); // Build the information about frames in this subgraph. ControlFlowInfo cf_info; - TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph_, &cf_info)); + TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph_.get(), &cf_info)); // Cache this value so we make this virtual function call once, rather // that O(# steps * # nodes per step) times. @@ -676,9 +675,9 @@ Status ExecutorImpl::Initialize() { // Initialize PendingCounts only after item->pending_id is initialized for // all nodes. - InitializePending(graph_, cf_info); + InitializePending(graph_.get(), cf_info); - return gview_.SetAllocAttrs(graph_, params_.device); + return gview_.SetAllocAttrs(graph_.get(), params_.device); } Status GraphView::SetAllocAttrs(const Graph* g, const Device* device) { @@ -1415,7 +1414,7 @@ void ExecutorImpl::InitializePending(const Graph* graph, } void ExecutorState::RunAsync(Executor::DoneCallback done) { - const Graph* graph = impl_->graph_; + const Graph* graph = impl_->graph_.get(); TaggedNodeSeq ready; // Ask the device to fill in the device context map. @@ -2606,9 +2605,10 @@ void ExecutorImpl::RunAsync(const Args& args, DoneCallback done) { } // end namespace -Status NewLocalExecutor(const LocalExecutorParams& params, const Graph* graph, +Status NewLocalExecutor(const LocalExecutorParams& params, + std::unique_ptr graph, Executor** executor) { - ExecutorImpl* impl = new ExecutorImpl(params, graph); + ExecutorImpl* impl = new ExecutorImpl(params, std::move(graph)); const Status s = impl->Initialize(); if (s.ok()) { *executor = impl; diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h index 3fd932da5b..adf80a2417 100644 --- a/tensorflow/core/common_runtime/executor.h +++ b/tensorflow/core/common_runtime/executor.h @@ -122,9 +122,8 @@ class Executor { // Creates an Executor that computes the given "graph". // -// If successful, returns the constructed executor in "*executor". The -// caller keeps the ownership of "device". The returned executor takes -// the ownership of "graph". Otherwise, returns an error status. +// If successful, returns the constructed executor in "*executor". Otherwise, +// returns an error status. // // "params" provides a set of context for the executor. We expect that // different context would provide different implementations. @@ -143,7 +142,8 @@ struct LocalExecutorParams { Executor::Args::NodeOutputsCallback node_outputs_cb; }; ::tensorflow::Status NewLocalExecutor(const LocalExecutorParams& params, - const Graph* graph, Executor** executor); + std::unique_ptr graph, + Executor** executor); // A class to help run multiple executors in parallel and wait until // all of them are complete. diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index d349d2bb12..b941819838 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -631,7 +631,7 @@ Status FunctionLibraryRuntimeImpl::CreateItem(Handle handle, Item** item) { }; Graph* graph = g.get(); Executor* exec; - TF_RETURN_IF_ERROR(NewLocalExecutor(params, g.release(), &exec)); + TF_RETURN_IF_ERROR(NewLocalExecutor(params, std::move(g), &exec)); { // Guard item since it is already inserted in items_. diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 8b05146299..63ad0d231c 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -71,11 +71,11 @@ class FunctionTest : public ::testing::Test { arg_types_ = result.arg_types; ret_types_ = result.ret_types; - Graph* g = new Graph(OpRegistry::Global()); + std::unique_ptr g(new Graph(OpRegistry::Global())); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = false; - TF_CHECK_OK(ConvertNodeDefsToGraph(opts, result.nodes, g)); + TF_CHECK_OK(ConvertNodeDefsToGraph(opts, result.nodes, g.get())); const int version = g->versions().producer(); LocalExecutorParams params; @@ -89,7 +89,7 @@ class FunctionTest : public ::testing::Test { DeleteNonCachedKernel(kernel); }; Executor* exec; - TF_CHECK_OK(NewLocalExecutor(params, g, &exec)); + TF_CHECK_OK(NewLocalExecutor(params, std::move(g), &exec)); exec_.reset(exec); } diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index a21304f7ef..f1082a6003 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -156,21 +156,21 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, // should not be running expensive operators. auto runner = [](Executor::Args::Closure c) { c(); }; - // Take ownership and pass to NewLocalExecutor - Graph* g = graph_to_run.release(); - LocalExecutorParams params; // The ownership of the output tensors are bound to this device's lifetime. params.device = cpu_device_.get(); params.function_library = function_library; - params.create_kernel = [this, g](const NodeDef& ndef, OpKernel** kernel) { - return CreateNonCachedKernel(cpu_device_.get(), nullptr, ndef, - g->versions().producer(), kernel); + const int producer = graph_to_run->versions().producer(); + params.create_kernel = [this, producer](const NodeDef& ndef, + OpKernel** kernel) { + return CreateNonCachedKernel(cpu_device_.get(), nullptr, ndef, producer, + kernel); }; params.delete_kernel = [](OpKernel* kernel) { delete kernel; }; Executor* executor; - TF_RETURN_IF_ERROR(NewLocalExecutor(params, g, &executor)); + TF_RETURN_IF_ERROR( + NewLocalExecutor(params, std::move(graph_to_run), &executor)); std::unique_ptr executor_unref(executor); Executor::Args args; diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc index 420dfe338e..64d8849475 100644 --- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc +++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc @@ -39,6 +39,7 @@ limitations under the License. namespace tensorflow { namespace test { +// TODO(hongm): Convert `g` and `init` to using std::unique_ptr. Benchmark::Benchmark(const string& device, Graph* g, const SessionOptions* options, Graph* init, Rendezvous* rendez) { @@ -85,7 +86,8 @@ Benchmark::Benchmark(const string& device, Graph* g, if (init) { Executor* init_exec; - TF_CHECK_OK(NewLocalExecutor(params, init, &init_exec)); + TF_CHECK_OK( + NewLocalExecutor(params, std::unique_ptr(init), &init_exec)); Executor::Args args; args.rendezvous = rendez_; args.runner = runner; @@ -93,7 +95,7 @@ Benchmark::Benchmark(const string& device, Graph* g, delete init_exec; } - TF_CHECK_OK(NewLocalExecutor(params, g, &exec_)); + TF_CHECK_OK(NewLocalExecutor(params, std::unique_ptr(g), &exec_)); } Benchmark::~Benchmark() { diff --git a/tensorflow/core/distributed_runtime/executor_test.cc b/tensorflow/core/distributed_runtime/executor_test.cc index 5b115f9a4d..e34224205b 100644 --- a/tensorflow/core/distributed_runtime/executor_test.cc +++ b/tensorflow/core/distributed_runtime/executor_test.cc @@ -57,7 +57,7 @@ class ExecutorTest : public ::testing::Test { } // Resets executor_ with a new executor based on a graph 'gdef'. - void Create(const Graph* graph) { + void Create(std::unique_ptr graph) { const int version = graph->versions().producer(); LocalExecutorParams params; params.device = device_; @@ -69,7 +69,7 @@ class ExecutorTest : public ::testing::Test { DeleteNonCachedKernel(kernel); }; delete exec_; - TF_CHECK_OK(NewLocalExecutor(params, graph, &exec_)); + TF_CHECK_OK(NewLocalExecutor(params, std::move(graph), &exec_)); runner_ = [this](std::function fn) { thread_pool_->Schedule(fn); }; rendez_ = NewLocalRendezvous(); } @@ -144,12 +144,12 @@ Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation, TEST_F(ExecutorTest, SimpleAdd) { // c = a + b - Graph* g = new Graph(OpRegistry::Global()); - auto in0 = test::graph::Recv(g, "a", "float", ALICE, 1, BOB); - auto in1 = test::graph::Recv(g, "b", "float", ALICE, 1, BOB); - auto tmp = test::graph::Add(g, in0, in1); - test::graph::Send(g, tmp, "c", BOB, 1, ALICE); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + auto in0 = test::graph::Recv(g.get(), "a", "float", ALICE, 1, BOB); + auto in1 = test::graph::Recv(g.get(), "b", "float", ALICE, 1, BOB); + auto tmp = test::graph::Add(g.get(), in0, in1); + test::graph::Send(g.get(), tmp, "c", BOB, 1, ALICE); + Create(std::move(g)); Rendezvous::Args args; TF_ASSERT_OK(rendez_->Send(Key(ALICE, kIncarnation, BOB, "a"), args, V(1.0), false)); // in0 = 1.0 @@ -172,15 +172,15 @@ TEST_F(ExecutorTest, SelfAdd) { // // b <- v10 // All nodes are executed by one thread. - Graph* g = new Graph(OpRegistry::Global()); - auto v = test::graph::Recv(g, "a", "float", ALICE, 1, BOB); + std::unique_ptr g(new Graph(OpRegistry::Global())); + auto v = test::graph::Recv(g.get(), "a", "float", ALICE, 1, BOB); const int N = 10; for (int i = 1; i <= N; ++i) { - v = test::graph::Add(g, v, v); + v = test::graph::Add(g.get(), v, v); } // out <- v10 - test::graph::Send(g, v, "b", BOB, 1, ALICE); - Create(g); + test::graph::Send(g.get(), v, "b", BOB, 1, ALICE); + Create(std::move(g)); Rendezvous::Args args; // a = 1.0 TF_ASSERT_OK( @@ -229,9 +229,9 @@ void BuildTree(int N, Graph* g) { } TEST_F(ExecutorTest, RandomTree) { - Graph* g = new Graph(OpRegistry::Global()); - BuildTree(4096, g); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + BuildTree(4096, g.get()); + Create(std::move(g)); Rendezvous::Args args; TF_ASSERT_OK( rendez_->Send(Key(ALICE, kIncarnation, BOB, "a"), args, V(1.0), false)); @@ -262,9 +262,9 @@ void BuildConcurrentAddAssign(Graph* g) { #ifndef THREAD_SANITIZER TEST_F(ExecutorTest, ConcurrentAddAssign) { - Graph* g = new Graph(OpRegistry::Global()); - BuildConcurrentAddAssign(g); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + BuildConcurrentAddAssign(g.get()); + Create(std::move(g)); for (int iters = 0; iters < 16; ++iters) { Rendezvous* rendez = NewLocalRendezvous(); TF_ASSERT_OK(Run(rendez)); @@ -281,12 +281,12 @@ TEST_F(ExecutorTest, ConcurrentAddAssign) { #endif TEST_F(ExecutorTest, SimpleSwitchLive) { - Graph* g = new Graph(OpRegistry::Global()); - auto in0 = test::graph::Recv(g, "a", "float", ALICE, 1, BOB); - auto in1 = test::graph::Constant(g, VB(false)); - auto tmp = test::graph::Switch(g, in0, in1); - test::graph::Send(g, tmp, "c", BOB, 1, ALICE); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + auto in0 = test::graph::Recv(g.get(), "a", "float", ALICE, 1, BOB); + auto in1 = test::graph::Constant(g.get(), VB(false)); + auto tmp = test::graph::Switch(g.get(), in0, in1); + test::graph::Send(g.get(), tmp, "c", BOB, 1, ALICE); + Create(std::move(g)); Rendezvous::Args args; TF_ASSERT_OK(rendez_->Send(Key(ALICE, kIncarnation, BOB, "a"), args, V(1.0), false)); // in0 = 1.0 @@ -300,12 +300,12 @@ TEST_F(ExecutorTest, SimpleSwitchLive) { } TEST_F(ExecutorTest, SimpleSwitchDead) { - Graph* g = new Graph(OpRegistry::Global()); - auto in0 = test::graph::Recv(g, "a", "float", ALICE, 1, BOB); - auto in1 = test::graph::Constant(g, VB(true)); - auto tmp = test::graph::Switch(g, in0, in1); - test::graph::Send(g, tmp, "c", BOB, 1, ALICE); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + auto in0 = test::graph::Recv(g.get(), "a", "float", ALICE, 1, BOB); + auto in1 = test::graph::Constant(g.get(), VB(true)); + auto tmp = test::graph::Switch(g.get(), in0, in1); + test::graph::Send(g.get(), tmp, "c", BOB, 1, ALICE); + Create(std::move(g)); Rendezvous::Args args; TF_ASSERT_OK(rendez_->Send(Key(ALICE, kIncarnation, BOB, "a"), args, V(1.0), false)); // in0 = 1.0 @@ -319,16 +319,16 @@ TEST_F(ExecutorTest, SimpleSwitchDead) { TEST_F(ExecutorTest, Abort) { // e = a + b + c + d - Graph* g = new Graph(OpRegistry::Global()); - auto in0 = test::graph::Recv(g, "a", "float", ALICE, 1, BOB); - auto in1 = test::graph::Recv(g, "b", "float", ALICE, 1, BOB); - auto in2 = test::graph::Recv(g, "c", "float", ALICE, 1, BOB); - auto in3 = test::graph::Recv(g, "d", "float", ALICE, 1, BOB); - auto add0 = test::graph::Add(g, in0, in1); - auto add1 = test::graph::Add(g, in2, in3); - auto add2 = test::graph::Add(g, add0, add1); - test::graph::Send(g, add2, "e", BOB, 1, ALICE); - Create(g); + std::unique_ptr g(new Graph(OpRegistry::Global())); + auto in0 = test::graph::Recv(g.get(), "a", "float", ALICE, 1, BOB); + auto in1 = test::graph::Recv(g.get(), "b", "float", ALICE, 1, BOB); + auto in2 = test::graph::Recv(g.get(), "c", "float", ALICE, 1, BOB); + auto in3 = test::graph::Recv(g.get(), "d", "float", ALICE, 1, BOB); + auto add0 = test::graph::Add(g.get(), in0, in1); + auto add1 = test::graph::Add(g.get(), in2, in3); + auto add2 = test::graph::Add(g.get(), add0, add1); + test::graph::Send(g.get(), add2, "e", BOB, 1, ALICE); + Create(std::move(g)); // Needs 4 inputs (recv). One of them is aborted. rendez_->Ref(); @@ -371,17 +371,17 @@ TEST_F(ExecutorTest, Abort) { } TEST_F(ExecutorTest, RecvInvalidDtype) { - Graph* g = new Graph(OpRegistry::Global()); + std::unique_ptr g(new Graph(OpRegistry::Global())); // An input vector of type float of size 1. - auto one = test::graph::Recv(g, "one", "float", ALICE, 1, BOB); + auto one = test::graph::Recv(g.get(), "one", "float", ALICE, 1, BOB); // A floating point variable vector of size 1. - auto var = test::graph::Var(g, DT_FLOAT, TensorShape({1})); + auto var = test::graph::Var(g.get(), DT_FLOAT, TensorShape({1})); // Initialize the variable with input. - auto init = test::graph::Assign(g, var, one); + auto init = test::graph::Assign(g.get(), var, one); // Output - auto* two = test::graph::Send(g, var, "two", BOB, 1, ALICE); + auto* two = test::graph::Send(g.get(), var, "two", BOB, 1, ALICE); g->AddControlEdge(init, two); // Ensures run after init. - Create(g); + Create(std::move(g)); Rendezvous* rendez = NewLocalRendezvous(); // Send a double instead of float. TF_ASSERT_OK(rendez->Send(Key(ALICE, 1, BOB, "one"), Rendezvous::Args(), @@ -396,11 +396,11 @@ TEST_F(ExecutorTest, RecvInvalidDtype) { } TEST_F(ExecutorTest, RecvInvalidRefDtype) { - Graph* g = new Graph(OpRegistry::Global()); + std::unique_ptr g(new Graph(OpRegistry::Global())); // A var that always produces as invalid dtype. - auto var = test::graph::InvalidRefType(g, DT_FLOAT, DT_DOUBLE); - test::graph::Send(g, var, "out", BOB, 1, ALICE); - Create(g); + auto var = test::graph::InvalidRefType(g.get(), DT_FLOAT, DT_DOUBLE); + test::graph::Send(g.get(), var, "out", BOB, 1, ALICE); + Create(std::move(g)); Rendezvous* rendez = NewLocalRendezvous(); EXPECT_TRUE(errors::IsInternal(Run(rendez))); Tensor output; diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 0120f612ac..7878ebb5f0 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -271,7 +271,7 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, skip_cost_models_ = false; } TF_RETURN_IF_ERROR( - NewLocalExecutor(params, subgraph.release(), &unit->root)); + NewLocalExecutor(params, std::move(subgraph), &unit->root)); } return Status::OK(); } -- GitLab From dcfa59b8c32e44bcd16fbd58ce91e7d3f332be36 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 13 Feb 2018 14:55:24 -0800 Subject: [PATCH 140/629] Change linkage type of modules to external after dropping initializers It isn't legal to have private global variables without initializers. In the current state the -noconst.ll LLVM IR cannot be passed to opt. PiperOrigin-RevId: 185593073 --- tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 15a4cf8532..22141e7e00 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Target/TargetOptions.h" @@ -67,6 +68,7 @@ std::unique_ptr DropConstantInitializers( std::unique_ptr cloned_module = CloneModule(&module); for (llvm::GlobalVariable& global_var : cloned_module->globals()) { global_var.setInitializer(nullptr); + global_var.setLinkage(llvm::GlobalValue::LinkageTypes::ExternalLinkage); } return cloned_module; } -- GitLab From b2a0f1c45b2283910548ebd88ee5aaf4b6fc6077 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 13 Feb 2018 18:20:07 -0500 Subject: [PATCH 141/629] Add instructions for building CUDA-enabled Android TensorFlow (#16961) * Add instructions for building CUDA-enabled Android TensorFlow * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md --- tensorflow/contrib/android/README.md | 5 ++ tensorflow/contrib/makefile/README.md | 99 +++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md index b8d73bf24c..db37bcf73d 100644 --- a/tensorflow/contrib/android/README.md +++ b/tensorflow/contrib/android/README.md @@ -81,6 +81,11 @@ For documentation on building a self-contained AAR file with cmake, see [tensorflow/contrib/android/cmake](cmake). +### Makefile + +For documentation on building native TF libraries with make, including a CUDA-enabled variant for devices like the Nvidia Shield TV, see [tensorflow/contrib/makefile/README.md](../makefile/README.md) + + ## AssetManagerFileSystem This directory also contains a TensorFlow filesystem supporting the Android diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 6959ca344f..b0228c5435 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -130,6 +130,105 @@ adb shell '/data/local/tmp/benchmark \ For more details, see the [benchmark documentation](../../tools/benchmark). +## CUDA support for Tegra devices running Android (Nvidia Shield TV, etc) + +With the release of TF 1.6 and JetPack for Android 3.2 (currently pending), you can now build a version of TensorFlow for compatible devices according to the following instructions which will receive the full benefits of GPU acceleration. + +#### Environment setup: + +First, download and install JetPack for Android version 3.2 or greater from [Nvidia](https://developers.nvidia.com). Note that as of the TF 1.6 release the JetPack for Android 3.2 release is still pending, and regular JetPack for L4T will not work. + +```bash +git clone https://github.com/tensorflow/tensorflow.git +cd tensorflow +JETPACK=$HOME/JetPack_Android_3.2 +TEGRA_LIBS="$JETPACK/cuDNN/aarch64/cuda/lib64/libcudnn.so $JETPACK/cuda-9.0/extras/CUPTI/lib64/libcupti.so $JETPACK/cuda/targets/aarch64-linux-androideabi/lib64/libcufft.so" +``` + +#### Building all CUDA-enabled native binaries: +This will build CUDA-enabled versions of libtensorflow_inference.so and the benchmark binary. (libtensorflow_demo.so will also be built incidentally, but it does not support CUDA) + +```bash +NDK_ROOT=$JETPACK/android-ndk-r13b +CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in -t "libtensorflow_inference.so libtensorflow_demo.so all" -a tegra +``` +(add -T on subsequent builds to skip protobuf downloading/building) + + +#### Testing the the CUDA-enabled benchmark via adb: +Build binaries first as above, then run: + +```bash +adb shell mkdir -p /data/local/tmp/lib64 +adb push $TEGRA_LIBS /data/local/tmp/lib64 +adb push tensorflow/contrib/makefile/gen/bin/android_arm64-v8a/benchmark /data/local/tmp +wget https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk +unzip tensorflow_demo.apk -d /tmp/tensorflow_demo +adb push /tmp/tensorflow_demo/assets/*.pb /data/local/tmp +adb shell "LD_LIBRARY_PATH=/data/local/tmp/lib64 /data/local/tmp/benchmark --graph=/data/local/tmp/tensorflow_inception_graph.pb" +``` + +#### Building the CUDA-enabled TensorFlow AAR with Bazel: +Build the native binaries first as above. Then, build the aar and package the native libs by executing the following: +```bash +mkdir -p /tmp/tf/jni/arm64-v8a +cp tensorflow/contrib/makefile/gen/lib/android_tegra/libtensorflow_*.so /tmp/tf/jni/arm64-v8a/ +cp $TEGRA_LIBS /tmp/tf/jni/arm64-v8a +bazel build //tensorflow/contrib/android:android_tensorflow_inference_java.aar +cp bazel-bin/tensorflow/contrib/android/android_tensorflow_inference_java.aar /tmp/tf/tensorflow.aar +cd /tmp/tf +chmod +w tensorflow.aar +zip -ur tensorflow.aar $(find jni -name *.so) +``` + +#### Building the CUDA-enabled TensorFlow Android demo with Bazel: +Build binaries first as above, then edit tensorflow/examples/android/BUILD and replace: +``` + srcs = [ + ":libtensorflow_demo.so", + "//tensorflow/contrib/android:libtensorflow_inference.so", + ], +``` +with: +``` +srcs = glob(["libs/arm64-v8a/*.so"]), +``` + +Then run: +```bash +# Create dir for native libs +mkdir -p tensorflow/examples/android/libs/arm64-v8a + +# Copy JetPack libs +cp $TEGRA_LIBS tensorflow/examples/android/libs/arm64-v8a + +# Copy native TensorFlow libraries +cp tensorflow/contrib/makefile/gen/lib/android_arm64-v8a/libtensorflow_*.so tensorflow/examples/android/libs/arm64-v8a/ + +# Build APK +bazel build -c opt --fat_apk_cpu=arm64-v8a tensorflow/android:tensorflow_demo + +# Install +adb install -r -f bazel-bin/tensorflow/examples/android/tensorflow_demo.apk +``` + +#### Building the CUDA-enabled Android demo with gradle/Android Studio: + +Add tensorflow/examples/android as an Android project in Android Studio as normal. + +Edit build.gradle and: +* set nativeBuildSystem = 'makefile' +* set cpuType = 'arm64-v8a' +* in "buildNativeMake", replace cpuType with 'tegra' (optional speedups like -T and ccache also work) +* set the environment "NDK_ROOT" var to $JETPACK/android-ndk-r13b + +Click "build apk" to build. + +Install: +```bash +adb install -r -f tensorflow/examples/android/gradleBuild/outputs/apk/debug/android-debug.apk +``` + ## iOS _Note: To use this library in an iOS application, see related instructions in -- GitLab From 9a0403542b3c67119e7097a491a2c5e2602b14c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 15:29:00 -0800 Subject: [PATCH 142/629] Wire in support for XLA kConditional instruction. PiperOrigin-RevId: 185598764 --- .../compiler/xla/service/copy_insertion.cc | 3 +- .../compiler/xla/service/heap_simulator.cc | 1 + .../compiler/xla/service/hlo_computation.cc | 7 +- .../compiler/xla/service/hlo_computation.h | 8 + tensorflow/compiler/xla/service/hlo_module.cc | 15 ++ .../compiler/xla/service/hlo_ordering.cc | 16 ++ .../xla/service/hlo_rematerialization.cc | 1 + .../compiler/xla/service/layout_assignment.cc | 212 ++++++++++++------ .../xla/service/layout_assignment_test.cc | 63 ++++++ 9 files changed, 251 insertions(+), 75 deletions(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cd983bc03e..c812df4235 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -729,7 +729,8 @@ class CopyRemover { // has a different operand (the operand of the elided copy). for (const HloUse* copy_use : copy_value_node->uses) { operand_node->uses.push_back(copy_use); - if (copy_use->instruction->opcode() == HloOpcode::kCopy) { + if (copy_use->instruction->opcode() == HloOpcode::kCopy && + ContainsKey(copy_map_, copy_use->instruction)) { copy_map_.at(copy_use->instruction).src = operand_node; } } diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index cde5877e29..a2d13c013c 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -225,6 +225,7 @@ Status HeapSimulator::RunComputation( // sub-computations will never be run concurrently. if (module_sequence_ != nullptr) { if (instruction->opcode() == HloOpcode::kCall || + instruction->opcode() == HloOpcode::kConditional || instruction->opcode() == HloOpcode::kWhile) { for (const HloComputation* called_computation : instruction->called_computations()) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 5432419e4a..21e6b2ca73 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -509,13 +509,14 @@ StatusOr HloComputation::DeepCopyInstruction( "Can't deep copy instruction %s: instruction is not in computation %s", instruction->name().c_str(), name().c_str()); } - if (indices_to_copy != nullptr && !ShapeUtil::Compatible(instruction->shape(), indices_to_copy->shape())) { return FailedPrecondition( "Can't deep copy instruction %s: given shape tree of indices to copy " - "has incompatible shape", - instruction->name().c_str()); + "has incompatible shapes: %s vs. %s", + instruction->name().c_str(), + ShapeUtil::HumanString(instruction->shape()).c_str(), + ShapeUtil::HumanString(indices_to_copy->shape()).c_str()); } ShapeIndex index; diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 061c59abe5..39d864efcb 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -77,6 +77,14 @@ class HloComputation { return last_added_instruction_; } + Status ForEachInstruction( + const std::function& func) const { + for (const auto& instruction : instructions_) { + TF_RETURN_IF_ERROR(func(instruction.get())); + } + return Status::OK(); + } + private: const string name_; HloInstruction* last_added_instruction_; diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 60270b0595..1e0e17f22f 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -145,6 +145,21 @@ void HloModule::ReplaceComputations( } break; } + case HloOpcode::kConditional: { + HloComputation* new_true_computation = + tensorflow::gtl::FindWithDefault( + replacements, instruction->true_computation(), nullptr); + if (new_true_computation != nullptr) { + instruction->set_true_computation(new_true_computation); + } + HloComputation* new_false_computation = + tensorflow::gtl::FindWithDefault( + replacements, instruction->false_computation(), nullptr); + if (new_false_computation != nullptr) { + instruction->set_false_computation(new_false_computation); + } + break; + } case HloOpcode::kSelectAndScatter: { HloComputation* new_select = tensorflow::gtl::FindWithDefault( replacements, instruction->select(), nullptr); diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index 68e3c9618c..1b24d8da9e 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -186,6 +186,22 @@ bool HloOrdering::UseIsBeforeValueDefinition( } } + if (use.instruction->opcode() == HloOpcode::kConditional) { + const HloInstruction* conditional = use.instruction; + if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), + conditional->true_computation())) { + VLOG(4) << " use is conditional " << use.instruction->name() + << " and def is in TRUE computation"; + return true; + } + if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), + conditional->false_computation())) { + VLOG(4) << " use is conditional " << use.instruction->name() + << " and def is in FALSE computation"; + return true; + } + } + VLOG(4) << " use is not before value"; return false; } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index c6b4dc0368..98b8d34be1 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -60,6 +60,7 @@ bool IsRematerializable(const HloInstruction* instruction) { switch (instruction->opcode()) { case HloOpcode::kCall: case HloOpcode::kConstant: + case HloOpcode::kConditional: case HloOpcode::kCrossReplicaSum: case HloOpcode::kCustomCall: case HloOpcode::kParameter: diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index fce135ef61..0668f66051 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -53,6 +53,83 @@ limitations under the License. namespace xla { +// For now moving only one API here, but we should have a single top level +// anonymous namespace, instead of three or four spread all over this file. +namespace { + +// Creates and returns a copy of the given instruction with a different +// layout. Tuple-shaped instructions will be deep-copied, and the last Tuple +// instruction producing the copy is returned. +StatusOr CreateCopyWithNewLayout( + const Shape& shape_with_layout, HloInstruction* instruction) { + TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); + DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) + << ShapeUtil::HumanString(shape_with_layout) << " " + << ShapeUtil::HumanString(instruction->shape()) + << " instruction: " << instruction->ToString(); + + if (ShapeUtil::IsTuple(instruction->shape())) { + // Deep-copy tuples. + std::vector element_copies; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); + ++i) { + HloInstruction* gte = instruction->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, + i)); + + // Recurse to copy each elements. + TF_ASSIGN_OR_RETURN( + HloInstruction * element_copy, + CreateCopyWithNewLayout( + ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); + element_copies.push_back(element_copy); + } + // Gather element copies into a tuple with a new Tuple instruction. + HloInstruction* tuple_copy = instruction->parent()->AddInstruction( + HloInstruction::CreateTuple(element_copies)); + LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, tuple_copy->mutable_shape())); + return tuple_copy; + } else if (ShapeUtil::IsArray(instruction->shape())) { + HloInstruction* copy = + instruction->parent()->AddInstruction(HloInstruction::CreateUnary( + instruction->shape(), HloOpcode::kCopy, instruction)); + LayoutUtil::ClearLayout(copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, copy->mutable_shape())); + + return copy; + } else { + return FailedPrecondition( + "Can only copy array and tuple shaped instructions"); + } +} + +// Creates a copy of the given operand if the operand's layout does not match +// the given layout. This copy replaces the use in the given instruction. Tuple +// operands will be deep-copied. +Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, + HloInstruction* instruction, + int64 operand_no) { + HloInstruction* operand = instruction->mutable_operand(operand_no); + TF_RET_CHECK(operand_layout.LayoutIsSet()); + TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); + + if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { + // Operand layout already matches our constraint. Nothing to do. + return Status::OK(); + } + + TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, + CreateCopyWithNewLayout(operand_layout.shape(), operand)); + + return instruction->ReplaceOperandWith(operand_no, operand_copy); +} + +} // namespace + std::ostream& operator<<(std::ostream& out, const LayoutConstraint& constraint) { out << constraint.ToString(); @@ -512,6 +589,36 @@ Status LayoutAssignment::AddMandatoryConstraints( body_layout.result_shape(), instruction)); TF_RETURN_IF_ERROR(constraints->SetOperandLayout( body_layout.result_shape(), instruction, 0)); + } else if (instruction->opcode() == HloOpcode::kConditional) { + // The layout of the true and false computations must match, and must + // be the layout of the kConditional instruction. + TF_RET_CHECK(instruction->operand_count() == 3); + + HloComputation* true_computation = instruction->true_computation(); + HloComputation* false_computation = instruction->false_computation(); + const HloInstruction* true_operand = instruction->operand(1); + const HloInstruction* false_operand = instruction->operand(2); + + TF_RET_CHECK(true_computation->num_parameters() == 1); + TF_RET_CHECK(false_computation->num_parameters() == 1); + ComputationLayout& true_computation_layout = + FindOrDie(computation_layouts_, true_computation); + ComputationLayout& false_computation_layout = + FindOrDie(computation_layouts_, false_computation); + + DCHECK(ShapeUtil::Compatible(true_operand->shape(), + true_computation_layout.parameter_shape(0))); + DCHECK(ShapeUtil::Compatible( + false_operand->shape(), false_computation_layout.parameter_shape(0))); + + TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( + true_computation_layout.result_shape(), instruction)); + TF_RETURN_IF_ERROR(constraints->SetOperandLayout( + true_computation_layout.parameter_shape(0), instruction, 1, + /*mandatory=*/true)); + TF_RETURN_IF_ERROR(constraints->SetOperandLayout( + false_computation_layout.parameter_shape(0), instruction, 2, + /*mandatory=*/true)); } else if (instruction->opcode() == HloOpcode::kCustomCall) { if (!CustomCallRequiresMajorFirstLayout(instruction)) { continue; @@ -598,6 +705,33 @@ Status CheckWhileLayout(HloInstruction* while_inst, return Status::OK(); } +Status CheckConditionalLayout( + HloInstruction* instruction, + const ComputationLayout& true_computation_layout, + const ComputationLayout& false_computation_layout) { + HloComputation* true_computation = instruction->true_computation(); + HloComputation* false_computation = instruction->false_computation(); + const HloInstruction* true_operand = instruction->operand(1); + const HloInstruction* false_operand = instruction->operand(2); + + TF_RET_CHECK(true_computation_layout.result_layout() == + false_computation_layout.result_layout()); + TF_RET_CHECK(true_computation_layout.result_layout().MatchesLayoutInShape( + instruction->shape())); + TF_RET_CHECK(true_computation_layout.result_layout().MatchesLayoutInShape( + true_computation->root_instruction()->shape())); + TF_RET_CHECK(false_computation_layout.result_layout().MatchesLayoutInShape( + instruction->shape())); + TF_RET_CHECK(false_computation_layout.result_layout().MatchesLayoutInShape( + false_computation->root_instruction()->shape())); + TF_RET_CHECK(true_computation_layout.parameter_layout(0).MatchesLayoutInShape( + true_operand->shape())); + TF_RET_CHECK( + false_computation_layout.parameter_layout(0).MatchesLayoutInShape( + false_operand->shape())); + return Status::OK(); +} + // Fusion parameters must match the layout of the fusion instructions operands, // and the root of the fusion expression must match the layout of the fusion // instruction. @@ -710,6 +844,13 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, instruction->while_condition()), FindOrDie(computation_layouts_, instruction->while_body()))); break; + case HloOpcode::kConditional: + TF_RETURN_IF_ERROR(CheckConditionalLayout( + instruction, + FindOrDie(computation_layouts_, instruction->true_computation()), + FindOrDie(computation_layouts_, + instruction->false_computation()))); + break; default: break; } @@ -1165,77 +1306,6 @@ StatusOr InferArrayLayout( return *first_buffer_layout; } -// Creates and returns a copy of the given instruction with a different -// layout. Tuple-shaped instructions will be deep-copied, and the last Tuple -// instruction producing the copy is returned. -StatusOr CreateCopyWithNewLayout( - const Shape& shape_with_layout, HloInstruction* instruction) { - TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); - DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) - << ShapeUtil::HumanString(shape_with_layout) << " " - << ShapeUtil::HumanString(instruction->shape()) - << " instruction: " << instruction->ToString(); - - if (ShapeUtil::IsTuple(instruction->shape())) { - // Deep-copy tuples. - std::vector element_copies; - for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); - ++i) { - HloInstruction* gte = instruction->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement( - ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, - i)); - - // Recurse to copy each elements. - TF_ASSIGN_OR_RETURN( - HloInstruction * element_copy, - CreateCopyWithNewLayout( - ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); - element_copies.push_back(element_copy); - } - // Gather element copies into a tuple with a new Tuple instruction. - HloInstruction* tuple_copy = instruction->parent()->AddInstruction( - HloInstruction::CreateTuple(element_copies)); - LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, tuple_copy->mutable_shape())); - return tuple_copy; - } else if (ShapeUtil::IsArray(instruction->shape())) { - HloInstruction* copy = - instruction->parent()->AddInstruction(HloInstruction::CreateUnary( - instruction->shape(), HloOpcode::kCopy, instruction)); - LayoutUtil::ClearLayout(copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, copy->mutable_shape())); - - return copy; - } else { - return FailedPrecondition( - "Can only copy array and tuple shaped instructions"); - } -} - -// Creates a copy of the given operand if the operand's layout does not match -// the given layout. This copy replaces the use in the given instruction. Tuple -// operands will be deep-copied. -Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, - HloInstruction* instruction, - int64 operand_no) { - HloInstruction* operand = instruction->mutable_operand(operand_no); - TF_RET_CHECK(operand_layout.LayoutIsSet()); - TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); - - if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { - // Operand layout already matches our constraint. Nothing to do. - return Status::OK(); - } - - TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, - CreateCopyWithNewLayout(operand_layout.shape(), operand)); - - return instruction->ReplaceOperandWith(operand_no, operand_copy); -} - // For fusion instructions, set the layout of each fused parameter instruction // to match the layout of its corresponding fusion instruction operand. Also, // set the layout of the fused root to match the layout of the fusion diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index e269a13459..dd0fba2758 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -658,5 +658,68 @@ TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { ElementsAre(2, 1, 0)); } +TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) { + auto builder = HloComputation::Builder(TestName()); + auto module = CreateNewModule(); + Shape shape = ShapeUtil::MakeShape(F32, {128, 8}); + Shape tshape = ShapeUtil::MakeTupleShape({shape, shape}); + Shape result_tshape = ShapeUtil::MakeTupleShape({shape}); + + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + auto param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + auto pred = builder.AddInstruction(HloInstruction::CreateParameter( + 2, ShapeUtil::MakeShape(PRED, {}), "param2")); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({param0, param1})); + + auto true_builder = HloComputation::Builder(TestName() + "_TrueBranch"); + { + auto param = true_builder.AddInstruction( + HloInstruction::CreateParameter(0, tshape, "param")); + auto gte0 = true_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, param, 0)); + auto gte1 = true_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, param, 1)); + auto add = true_builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, gte0, gte1)); + true_builder.AddInstruction(HloInstruction::CreateTuple({add})); + } + HloComputation* true_computation = + module->AddEmbeddedComputation(true_builder.Build()); + + auto false_builder = HloComputation::Builder(TestName() + "_FalseBranch"); + { + Shape xshape = ShapeUtil::MakeShapeWithLayout(F32, {128, 8}, {0, 1}); + false_builder.AddInstruction( + HloInstruction::CreateParameter(0, tshape, "param")); + // Using infeed as layout assignment does not mess up with it. + auto infeed = + false_builder.AddInstruction(HloInstruction::CreateInfeed(xshape, "")); + false_builder.AddInstruction(HloInstruction::CreateTuple({infeed})); + } + HloComputation* false_computation = + module->AddEmbeddedComputation(false_builder.Build()); + builder.AddInstruction(HloInstruction::CreateConditional( + result_tshape, pred, tuple, true_computation, tuple, false_computation)); + + HloComputation* computation = module->AddEntryComputation(builder.Build()); + ComputationLayout computation_layout(computation->ComputeProgramShape()); + + AssignLayouts(module.get(), &computation_layout); + + const HloInstruction* true_root = true_computation->root_instruction(); + const HloInstruction* false_root = false_computation->root_instruction(); + EXPECT_THAT(true_root->opcode(), HloOpcode::kTuple); + EXPECT_THAT(false_root->opcode(), HloOpcode::kTuple); + + const HloInstruction* true_result = true_root->operand(0); + const HloInstruction* false_result = false_root->operand(0); + EXPECT_TRUE(LayoutUtil::Equal(true_result->shape().layout(), + false_result->shape().layout())); + EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy); +} + } // namespace } // namespace xla -- GitLab From bdfdfb2f5e6d991b4dabcce0f70dc9e9ea1e0656 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 16:09:00 -0800 Subject: [PATCH 143/629] Allow an option to delete the temporary file created at compilation on exit. Defaulting it to true, though we may want to keep it off during development. PiperOrigin-RevId: 185604628 --- tensorflow/contrib/py2tf/pyct/compiler.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/pyct/compiler.py b/tensorflow/contrib/py2tf/pyct/compiler.py index 0caadf18c0..51cf6930e8 100644 --- a/tensorflow/contrib/py2tf/pyct/compiler.py +++ b/tensorflow/contrib/py2tf/pyct/compiler.py @@ -22,6 +22,7 @@ from __future__ import division from __future__ import print_function # TODO(mdan): Use six for compatibility here. +import atexit import imp import os import tempfile @@ -41,7 +42,8 @@ def ast_to_source(node, indentation): return astor.source_repr.pretty_source(generator.result).lstrip() -def ast_to_object(node, indentation=' ', source_prefix=None): +def ast_to_object( + node, indentation=' ', source_prefix=None, delete_on_exit=True): """Return the Python objects represented by given AST. Compiling the AST code this way ensures that the source code is readable by @@ -51,6 +53,8 @@ def ast_to_object(node, indentation=' ', source_prefix=None): node: The code to compile, as an AST object. indentation: The string to use for indentation. source_prefix: Optional string to print as-is into the source file. + delete_on_exit: Whether to delete the temporary file used for compilation + on exit. Returns: A module object containing the compiled source code. @@ -63,4 +67,6 @@ def ast_to_object(node, indentation=' ', source_prefix=None): f.write(source_prefix) f.write('\n') f.write(source) + if delete_on_exit: + atexit.register(lambda: os.remove(f.name)) return imp.load_source(module_name, f.name), source -- GitLab From 79da4650981d26816f6b135ccf0a89acc5a37d88 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 13 Feb 2018 16:17:55 -0800 Subject: [PATCH 144/629] CleanupFunc doesn't need to do cleanup if _py_funcs is already destroyed. PiperOrigin-RevId: 185606203 --- tensorflow/python/ops/script_ops.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 1b9071ee93..61e14adf4b 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -176,7 +176,10 @@ class CleanupFunc(object): self._token = token def __del__(self): - _py_funcs.remove(self._token) + if _py_funcs is not None: + # If _py_funcs is None, the program is most likely in shutdown, and the + # _py_funcs object has been destroyed already. + _py_funcs.remove(self._token) def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): -- GitLab From 49701caf32f6a9292bddcf34cc5f298b8f7b40d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 16:43:20 -0800 Subject: [PATCH 145/629] Update TPU Profiler to be able to take a TPU name PiperOrigin-RevId: 185609588 --- .../pip_package/cloud_tpu_profiler/main.py | 68 ++++++++++++++----- 1 file changed, 51 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py index 78d237e6a2..a730d6142d 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/cloud_tpu_profiler/main.py @@ -25,19 +25,34 @@ import sys import tensorflow as tf +# Cloud TPU Cluster Resolvers flags.DEFINE_string( - 'service_addr', None, 'Address of TPU profiler service e.g. ' - 'localhost:8466') + 'gcp_project', None, + 'Project name for the Cloud TPU-enabled project. If not specified, we ' + 'will attempt to automatically detect the GCE project from metadata.') +flags.DEFINE_string( + 'tpu_zone', + None, + help='GCE zone where the Cloud TPU is located in. If not specified, we ' + 'will attempt to automatically detect the GCE project from metadata.') +flags.DEFINE_string('tpu_name', None, + 'Name of the Cloud TPU for Cluster Resolvers. You must ' + 'specify either this flag or --master.') + +# Tool specific parameters flags.DEFINE_string( - 'logdir', None, 'Path of TensorBoard log directory e.g. /tmp/tb_log, ' - 'gs://tb_bucket') + 'service_addr', None, 'Address of TPU profiler service e.g. ' + 'localhost:8466, you must specify either this flag or --tpu_name.') +flags.DEFINE_string('logdir', None, + 'Path of TensorBoard log directory e.g. /tmp/tb_log, ' + 'gs://tb_bucket') flags.DEFINE_integer('duration_ms', 2000, 'Duration of tracing in ms.') -flags.DEFINE_integer( - 'num_tracing_attempts', 3, 'Automatically retry N times when no trace ' - 'event is collected.') -flags.DEFINE_boolean( - 'include_dataset_ops', True, 'Set to false to profile longer TPU ' - 'device traces.') +flags.DEFINE_integer('num_tracing_attempts', 3, + 'Automatically retry N times when no trace ' + 'event is collected.') +flags.DEFINE_boolean('include_dataset_ops', True, + 'Set to false to profile longer TPU ' + 'device traces.') FLAGS = flags.FLAGS EXECUTABLE = 'data/capture_tpu_profile' @@ -48,16 +63,35 @@ def run_main(): def main(unused_argv=None): - if not FLAGS.service_addr or not FLAGS.logdir: - sys.exit('service_addr and logdir must be provided.') + tf.logging.set_verbosity(tf.logging.INFO) + + if FLAGS.service_addr is None and FLAGS.tpu_name is None: + sys.exit('You must specify either --service_addr or --tpu_name.') + + if FLAGS.service_addr is not None: + if FLAGS.tpu_name is not None: + tf.logging.warn('Both --service_addr and --tpu_name are set. Ignoring ' + '--tpu_name and using --service_addr.') + service_addr = FLAGS.service_addr + else: + tpu_cluster_resolver = ( + tf.contrib.cluster_resolver.TPUClusterResolver( + tpu_names=[FLAGS.tpu_name], + zone=FLAGS.tpu_zone, + project=FLAGS.gcp_project)) + service_addr = tpu_cluster_resolver.get_master() + service_addr = service_addr.replace('grpc://', '').replace(':8470', ':8466') + + if not FLAGS.logdir: + sys.exit('logdir must be provided.') executable_path = os.path.join(os.path.dirname(__file__), EXECUTABLE) logdir = os.path.expandvars(os.path.expanduser(FLAGS.logdir)) cmd = [executable_path] - cmd.append('--logdir='+logdir) - cmd.append('--service_addr='+FLAGS.service_addr) - cmd.append('--duration_ms='+str(FLAGS.duration_ms)) - cmd.append('--num_tracing_attempts='+str(FLAGS.num_tracing_attempts)) - cmd.append('--include_dataset_ops='+str(FLAGS.include_dataset_ops).lower()) + cmd.append('--logdir=' + logdir) + cmd.append('--service_addr=' + service_addr) + cmd.append('--duration_ms=' + str(FLAGS.duration_ms)) + cmd.append('--num_tracing_attempts=' + str(FLAGS.num_tracing_attempts)) + cmd.append('--include_dataset_ops=' + str(FLAGS.include_dataset_ops).lower()) subprocess.call(cmd) -- GitLab From f0029e14a17c53e97f9ddb02486efdcc06165091 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 13 Feb 2018 16:43:21 -0800 Subject: [PATCH 146/629] Fix incorrect is_training parameter. And remove many is_training default values to avoid these mistakes from happening again. PiperOrigin-RevId: 185609589 --- .../quantize/python/fold_batch_norms.py | 20 +++--- .../contrib/quantize/python/quantize.py | 38 ++++++----- .../contrib/quantize/python/quantize_graph.py | 2 +- .../quantize/python/quantize_graph_test.py | 64 +++++++++++++++++++ .../python/quantize_parameterized_test.py | 18 +++--- .../contrib/quantize/python/quantize_test.py | 22 ++++++- 6 files changed, 124 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 36a848d2a8..75d9eb0e58 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -34,7 +34,7 @@ from tensorflow.python.ops import nn_ops from tensorflow.python.util import compat -def FoldBatchNorms(graph, freeze_batch_norm_delay=None, is_training=True): +def FoldBatchNorms(graph, is_training, freeze_batch_norm_delay=None): """Finds batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise @@ -42,24 +42,22 @@ def FoldBatchNorms(graph, freeze_batch_norm_delay=None, is_training=True): Args: graph: Graph to walk and modify. + is_training: Bool, true if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. This value is used only when is_training is True. - is_training: Bool, true if training. Raises: ValueError: When batch norm folding fails. """ _FoldFusedBatchNorms( - graph, - freeze_batch_norm_delay=freeze_batch_norm_delay, - is_training=is_training) + graph, is_training, freeze_batch_norm_delay=freeze_batch_norm_delay) _FoldUnfusedBatchNorms( graph, - freeze_batch_norm_delay=freeze_batch_norm_delay, - is_training=is_training) + is_training=is_training, + freeze_batch_norm_delay=freeze_batch_norm_delay) -def _FoldFusedBatchNorms(graph, freeze_batch_norm_delay, is_training): +def _FoldFusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds fused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise @@ -67,9 +65,9 @@ def _FoldFusedBatchNorms(graph, freeze_batch_norm_delay, is_training): Args: graph: Graph to walk and modify. + is_training: Bool, true if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. - is_training: Bool, true if training. Raises: ValueError: When batch norm folding fails. @@ -416,7 +414,7 @@ def _FoldFusedBatchNormGrad(op, unused_grad_y, grad_mean, grad_var, unused_1, return (dmean_dx + dvar_dx), None, None, None, None -def _FoldUnfusedBatchNorms(graph, freeze_batch_norm_delay, is_training): +def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): """Finds unfused batch norm layers and folds them into preceding layers. Folding only affects the following layers: Conv2D, fully connected, depthwise @@ -424,9 +422,9 @@ def _FoldUnfusedBatchNorms(graph, freeze_batch_norm_delay, is_training): Args: graph: Graph to walk and modify. + is_training: Bool, True if training. freeze_batch_norm_delay: How many steps to wait before freezing moving mean and variance and using them for batch normalization. - is_training: Bool, True if training Raises: ValueError: When batch norm folding fails. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index e44b91f0d0..7a3f92f503 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -40,16 +40,17 @@ _WEIGHT_TYPES = {'Variable', 'VariableV2'} def Quantize(graph, + is_training, weight_bits=8, activation_bits=8, ema_decay=0.999, quant_delay=None, - vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, - is_training=True): + vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES): """Updates graph with quantization operations. Args: graph: Graph to modify. + is_training: Whether quantizing training graph or eval graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update @@ -60,7 +61,6 @@ def Quantize(graph, training. vars_collection: (Optional) Collection where to store the variables for quantization interval ends. - is_training: (Optional) Whether quantizing training graph or eval graph. Raises: ValueError: When quantization fails. """ @@ -70,15 +70,15 @@ def Quantize(graph, context = _GetContextFromOp(layer_match.layer_op) _InsertQuantOp( context, + 'weights_quant', layer_match.weight_tensor.op, [layer_match.layer_op], - name='weights_quant', + is_training, moving_avg=False, - bits=weight_bits, ema_decay=ema_decay, quant_delay=quant_delay, - is_training=is_training, narrow_range=True, - vars_collection=vars_collection) + vars_collection=vars_collection, + bits=weight_bits) # Quantize the activations. consumer_ops = input_to_ops_map.ConsumerOperations( @@ -88,23 +88,25 @@ def Quantize(graph, add_context = re.search(r'^(.*)/([^/]+)', context).group(1) _InsertQuantOp( add_context, + 'act_quant', layer_match.activation_op, consumer_ops, - name='act_quant', + is_training, moving_avg=True, - init_min=0.0, ema_decay=ema_decay, quant_delay=quant_delay, + vars_collection=vars_collection, bits=activation_bits, - vars_collection=vars_collection) + init_min=0.0) # Quantize the inputs and output to the bypass (if it exists). The input to # the bypass is the bias add, and the output is the activation. if layer_match.bypass_op is not None: _InsertQuantOp( context, + 'conv_quant', layer_match.bias_add_op, [layer_match.bypass_op], - name='conv_quant', + is_training, moving_avg=True, ema_decay=ema_decay, quant_delay=quant_delay, @@ -112,10 +114,14 @@ def Quantize(graph, bits=activation_bits) _InsertQuantOp( add_context, + 'add_quant', layer_match.bypass_op, input_to_ops_map.ConsumerOperations(layer_match.bypass_op), - name='add_quant', + is_training, moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, bits=activation_bits) @@ -235,9 +241,10 @@ class _LayerMatch(object): def _InsertQuantOp(context, + name, producer, consumers, - name, + is_training, moving_avg=True, init_min=-6.0, init_max=6.0, @@ -245,16 +252,16 @@ def _InsertQuantOp(context, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, - is_training=True, narrow_range=False): """Inserts a quant op between a producer op and (multiple) consumer ops. Args: context: Context w,here producer and consumer operations are nested. + name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. consumers: Consumer operations of the pairs. - name: Name for the new quantization op within the context. + is_training: Whether quantizing training graph or eval graph. moving_avg: Specifies whether to use exponential moving average or just the last value seen. init_min: Starting minimum value for the new quantization op. @@ -268,7 +275,6 @@ def _InsertQuantOp(context, training. vars_collection: (Optional) Collection where to store the variables for quantization interval ends. - is_training: (Optional) Whether quantizing training graph or eval graph. narrow_range: Whether to use the narrow quantization range [1; 2^bits - 1] or wide range [0; 2^bits - 1]. Raises: diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index b91e045175..0dfe78fd02 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -63,7 +63,7 @@ def _create_graph(input_graph=None, is_training=is_training) quantize.Quantize( input_graph, - is_training=is_training, + is_training, quant_delay=quant_delay, weight_bits=weight_bits, activation_bits=activation_bits) diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index 5c65a160a0..6b9289ef5f 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -50,6 +50,14 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): for fn in rewrite_fns: test_fn(fn) + def _RunTestOverEvalRewrites(self, test_fn): + rewrite_fns = [ + quantize_graph.create_eval_graph, + quantize_graph.experimental_create_eval_graph, + ] + for fn in rewrite_fns: + test_fn(fn) + def _RunTestOverExperimentalRewrites(self, test_fn): rewrite_fns = [ quantize_graph.experimental_create_training_graph, @@ -147,6 +155,62 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): self.assertEqual(op.get_attr('num_bits'), activation_bits) self.assertTrue(act_quant_found) + def testTrainingQuantization(self): + self._RunTestOverTrainingRewrites(self._TestTrainingQuantization) + + def _TestTrainingQuantization(self, rewrite_fn): + with ops.Graph().as_default() as g: + self._ConvLayer() + rewrite_fn() + + # Ensure that FakeQuant and variable update nodes were found. + quant_found = False + assign_min_last_found = False + assign_min_ema_found = False + assign_max_last_found = False + assign_max_ema_found = False + for op in g.get_operations(): + # Check that FakeQuant operations were added. + if op.type == 'FakeQuantWithMinMaxVars': + quant_found = True + # Check that update operations for the added min max variables exist in + # the graph. + if 'AssignMinLast' in op.name: + assign_min_last_found = True + elif 'AssignMinEma' in op.name: + assign_min_ema_found = True + elif 'AssignMaxLast' in op.name: + assign_max_last_found = True + elif 'AssignMaxEma' in op.name: + assign_max_ema_found = True + self.assertTrue(assign_min_last_found) + self.assertTrue(assign_min_ema_found) + self.assertTrue(assign_max_last_found) + self.assertTrue(assign_max_ema_found) + self.assertTrue(quant_found) + + def testEvalQuantization(self): + self._RunTestOverEvalRewrites(self._TestEvalQuantization) + + def _TestEvalQuantization(self, rewrite_fn): + with ops.Graph().as_default() as g: + self._ConvLayer() + rewrite_fn() + + # Ensure that FakeQuant and variable update nodes were found. + quant_found = False + for op in g.get_operations(): + # Check that FakeQuant operations were added. + if op.type == 'FakeQuantWithMinMaxVars': + quant_found = True + # Check that update operations for the added min max variables don't + # exist in the graph. + update_names = [ + 'AssignMinLast', 'AssignMinEma', 'AssignMaxLast', 'AssignMaxEma' + ] + self.assertFalse(any(s in op.name for s in update_names)) + self.assertTrue(quant_found) + def _ConvLayer(self): """Add a basic convolution layer to the default graph.""" batch_size, height, width, depth = 5, 128, 128, 3 diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 2e74f3b04d..639a7454a9 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -88,7 +88,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) @@ -164,7 +164,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + @@ -240,7 +240,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + @@ -363,9 +363,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - fold_batch_norms.FoldBatchNorms(graph) + fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + @@ -446,9 +446,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - fold_batch_norms.FoldBatchNorms(graph) + fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + @@ -534,9 +534,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - fold_batch_norms.FoldBatchNorms(graph) + fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, quant_delay=delay) + quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index 53cbd66741..bb7be08094 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -35,7 +35,15 @@ separable_conv2d = layers.separable_conv2d class QuantizeTest(test_util.TensorFlowTestCase): + def _RunTestOverParameters(self, test_fn): + params = [True, False] + for is_training in params: + test_fn(is_training) + def testInsertQuantOpFailsWhenOpsNotConnected(self): + pass + + def _TestInsertQuantOpFailsWhenOpsNotConnected(self, is_training): graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 @@ -48,11 +56,15 @@ class QuantizeTest(test_util.TensorFlowTestCase): # Inserting a quantization op between two unconnected ops should fail with # ValueError. with self.assertRaises(ValueError) as err: - quantize._InsertQuantOp('test', conv.op, [relu.op], 'FailingQuantOp') + quantize._InsertQuantOp('test', is_training, conv.op, [relu.op], + 'FailingQuantOp') self.assertEqual( str(err.exception), 'Some inputs not quantized for ops: [Relu6]') def testInsertQuantOpForAddAfterConv2d(self): + self._RunTestOverParameters(self._TestInsertQuantOpForAddAfterConv2d) + + def _TestInsertQuantOpForAddAfterConv2d(self, is_training): graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 @@ -67,7 +79,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph=graph, weight_bits=8, activation_bits=8) + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) quantization_node_name = 'FakeQuantWithMinMaxVars' add_quant = graph.get_operation_by_name('test/add_quant/' + @@ -75,6 +87,10 @@ class QuantizeTest(test_util.TensorFlowTestCase): self.assertEqual(add_quant.type, quantization_node_name) def testInsertQuantOpForAddAfterSeparableConv2d(self): + self._RunTestOverParameters( + self._TestInsertQuantOpForAddAfterSeparableConv2d) + + def _TestInsertQuantOpForAddAfterSeparableConv2d(self, is_training): graph = ops.Graph() with graph.as_default(): batch_size, height, width, depth = 5, 128, 128, 3 @@ -90,7 +106,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph=graph, weight_bits=8, activation_bits=8) + quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8) quantization_node_name = 'FakeQuantWithMinMaxVars' add_quant = graph.get_operation_by_name('test/add_quant/' + -- GitLab From b6c2273f0a7735b9850080eefd26b6056b4d5f42 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Wed, 7 Feb 2018 17:19:11 -0500 Subject: [PATCH 147/629] Bump JetPack default to 3.2 in Android build script (#16842) --- tensorflow/contrib/makefile/build_all_android.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh index f67c516186..fc88f59e09 100755 --- a/tensorflow/contrib/makefile/build_all_android.sh +++ b/tensorflow/contrib/makefile/build_all_android.sh @@ -52,7 +52,7 @@ shift $((OPTIND - 1)) if [ "$ARCH" == "tegra" ]; then if [[ -z "${JETPACK}" ]]; then - export JETPACK="$HOME/JetPack_Android_3.0" + export JETPACK="$HOME/JetPack_Android_3.2" fi if [ ! -d ${JETPACK} ]; then echo "Can't find Jetpack at ${JETPACK}" -- GitLab From 22ff574ee9d4272995ddcc6aaac70479b7e6e17c Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 13 Feb 2018 18:20:07 -0500 Subject: [PATCH 148/629] Add instructions for building CUDA-enabled Android TensorFlow (#16961) * Add instructions for building CUDA-enabled Android TensorFlow * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md --- tensorflow/contrib/android/README.md | 5 ++ tensorflow/contrib/makefile/README.md | 99 +++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md index b8d73bf24c..db37bcf73d 100644 --- a/tensorflow/contrib/android/README.md +++ b/tensorflow/contrib/android/README.md @@ -81,6 +81,11 @@ For documentation on building a self-contained AAR file with cmake, see [tensorflow/contrib/android/cmake](cmake). +### Makefile + +For documentation on building native TF libraries with make, including a CUDA-enabled variant for devices like the Nvidia Shield TV, see [tensorflow/contrib/makefile/README.md](../makefile/README.md) + + ## AssetManagerFileSystem This directory also contains a TensorFlow filesystem supporting the Android diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 0613de2cab..9758ee1c47 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -130,6 +130,105 @@ adb shell '/data/local/tmp/benchmark \ For more details, see the [benchmark documentation](../../tools/benchmark). +## CUDA support for Tegra devices running Android (Nvidia Shield TV, etc) + +With the release of TF 1.6 and JetPack for Android 3.2 (currently pending), you can now build a version of TensorFlow for compatible devices according to the following instructions which will receive the full benefits of GPU acceleration. + +#### Environment setup: + +First, download and install JetPack for Android version 3.2 or greater from [Nvidia](https://developers.nvidia.com). Note that as of the TF 1.6 release the JetPack for Android 3.2 release is still pending, and regular JetPack for L4T will not work. + +```bash +git clone https://github.com/tensorflow/tensorflow.git +cd tensorflow +JETPACK=$HOME/JetPack_Android_3.2 +TEGRA_LIBS="$JETPACK/cuDNN/aarch64/cuda/lib64/libcudnn.so $JETPACK/cuda-9.0/extras/CUPTI/lib64/libcupti.so $JETPACK/cuda/targets/aarch64-linux-androideabi/lib64/libcufft.so" +``` + +#### Building all CUDA-enabled native binaries: +This will build CUDA-enabled versions of libtensorflow_inference.so and the benchmark binary. (libtensorflow_demo.so will also be built incidentally, but it does not support CUDA) + +```bash +NDK_ROOT=$JETPACK/android-ndk-r13b +CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in -t "libtensorflow_inference.so libtensorflow_demo.so all" -a tegra +``` +(add -T on subsequent builds to skip protobuf downloading/building) + + +#### Testing the the CUDA-enabled benchmark via adb: +Build binaries first as above, then run: + +```bash +adb shell mkdir -p /data/local/tmp/lib64 +adb push $TEGRA_LIBS /data/local/tmp/lib64 +adb push tensorflow/contrib/makefile/gen/bin/android_arm64-v8a/benchmark /data/local/tmp +wget https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk +unzip tensorflow_demo.apk -d /tmp/tensorflow_demo +adb push /tmp/tensorflow_demo/assets/*.pb /data/local/tmp +adb shell "LD_LIBRARY_PATH=/data/local/tmp/lib64 /data/local/tmp/benchmark --graph=/data/local/tmp/tensorflow_inception_graph.pb" +``` + +#### Building the CUDA-enabled TensorFlow AAR with Bazel: +Build the native binaries first as above. Then, build the aar and package the native libs by executing the following: +```bash +mkdir -p /tmp/tf/jni/arm64-v8a +cp tensorflow/contrib/makefile/gen/lib/android_tegra/libtensorflow_*.so /tmp/tf/jni/arm64-v8a/ +cp $TEGRA_LIBS /tmp/tf/jni/arm64-v8a +bazel build //tensorflow/contrib/android:android_tensorflow_inference_java.aar +cp bazel-bin/tensorflow/contrib/android/android_tensorflow_inference_java.aar /tmp/tf/tensorflow.aar +cd /tmp/tf +chmod +w tensorflow.aar +zip -ur tensorflow.aar $(find jni -name *.so) +``` + +#### Building the CUDA-enabled TensorFlow Android demo with Bazel: +Build binaries first as above, then edit tensorflow/examples/android/BUILD and replace: +``` + srcs = [ + ":libtensorflow_demo.so", + "//tensorflow/contrib/android:libtensorflow_inference.so", + ], +``` +with: +``` +srcs = glob(["libs/arm64-v8a/*.so"]), +``` + +Then run: +```bash +# Create dir for native libs +mkdir -p tensorflow/examples/android/libs/arm64-v8a + +# Copy JetPack libs +cp $TEGRA_LIBS tensorflow/examples/android/libs/arm64-v8a + +# Copy native TensorFlow libraries +cp tensorflow/contrib/makefile/gen/lib/android_arm64-v8a/libtensorflow_*.so tensorflow/examples/android/libs/arm64-v8a/ + +# Build APK +bazel build -c opt --fat_apk_cpu=arm64-v8a tensorflow/android:tensorflow_demo + +# Install +adb install -r -f bazel-bin/tensorflow/examples/android/tensorflow_demo.apk +``` + +#### Building the CUDA-enabled Android demo with gradle/Android Studio: + +Add tensorflow/examples/android as an Android project in Android Studio as normal. + +Edit build.gradle and: +* set nativeBuildSystem = 'makefile' +* set cpuType = 'arm64-v8a' +* in "buildNativeMake", replace cpuType with 'tegra' (optional speedups like -T and ccache also work) +* set the environment "NDK_ROOT" var to $JETPACK/android-ndk-r13b + +Click "build apk" to build. + +Install: +```bash +adb install -r -f tensorflow/examples/android/gradleBuild/outputs/apk/debug/android-debug.apk +``` + ## iOS _Note: To use this library in an iOS application, see related instructions in -- GitLab From ecec1d8c8e557656e7e5c4034604ca6f20d2d3c2 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 13 Feb 2018 18:49:44 -0500 Subject: [PATCH 149/629] Update RELEASE.md --- RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE.md b/RELEASE.md index de4a34bb04..1a037ce595 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,7 @@ * New Optimizer internal API for non-slot variables. Descendants of AdamOptimizer that access _beta[12]_power will need to be updated. * `tf.estimator.{FinalExporter,LatestExporter}` now export stripped SavedModels. This improves forward compatibility of the SavedModel. * FFT support added to XLA CPU/GPU. +* Android TF can now be built with CUDA acceleration on compatible Tegra devices (see [contrib/makefile/README.md](contrib/makefile/README.md) for more information) ## Bug Fixes and Other Changes * Documentation updates: -- GitLab From cf04f92c340b6fb0207eb780959a12fa03356f77 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 13 Feb 2018 16:55:54 -0800 Subject: [PATCH 150/629] Improve type safety around float constants Instead of passing floating point constants to the vector support library as compiler-side floats, pass them as APFloats instead. This reduces the duration during which these constants are semantically represented as floats on the host side and are subject to fast-math-like behavior. This is especially important in cases where the exact bit representation of the floating point constant is significant, but also makes progress towards ensuring that e.g. build XLA with -ffast-math does not change the IR we generate. PiperOrigin-RevId: 185611301 --- .../xla/service/cpu/llvm_ir_runtime.cc | 86 ++++++++++--------- .../xla/service/cpu/vector_support_library.cc | 7 +- .../xla/service/cpu/vector_support_library.h | 51 +++++++---- 3 files changed, 84 insertions(+), 60 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc index ee213e05d1..2e5cc96098 100644 --- a/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/llvm_ir_runtime.cc @@ -63,7 +63,8 @@ llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module, CHECK_EQ(input->getType(), vsl.vector_type()); // This implements the same rational interpolant as implemented in Eigen3. - llvm::Value* input_clamped = vsl.Clamp(input, /*low=*/-9.0, /*high=*/9.0); + llvm::Value* input_clamped = + vsl.Clamp(input, /*low=*/GetIeeeF32(-9.0), /*high=*/GetIeeeF32(9.0)); std::array numerator_coeffs{ -2.76076847742355e-16f, 2.00018790482477e-13f, -8.60467152213735e-11f, @@ -75,16 +76,18 @@ llvm::Function* EmitVectorF32TanhIfNeeded(llvm::Module* module, 4.89352518554385e-03f}; llvm::Value* input_squared = vsl.Mul(input_clamped, input_clamped); - llvm::Value* numerator = vsl.SplatFloat(numerator_coeffs[0]); + llvm::Value* numerator = vsl.SplatFloat(GetIeeeF32(numerator_coeffs[0])); for (int i = 1; i < numerator_coeffs.size(); i++) { - numerator = vsl.MulAdd(input_squared, numerator, numerator_coeffs[i]); + numerator = + vsl.MulAdd(input_squared, numerator, GetIeeeF32(numerator_coeffs[i])); } numerator = vsl.Mul(input_clamped, numerator); - llvm::Value* denominator = vsl.SplatFloat(denominator_coeffs[0]); + llvm::Value* denominator = vsl.SplatFloat(GetIeeeF32(denominator_coeffs[0])); for (int i = 1; i < denominator_coeffs.size(); i++) { - denominator = vsl.MulAdd(input_squared, denominator, denominator_coeffs[i]); + denominator = vsl.MulAdd(input_squared, denominator, + GetIeeeF32(denominator_coeffs[i])); } llvm::Value* result = vsl.Div(numerator, denominator); @@ -119,24 +122,27 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, // This implements the same polynomial approximation as implemented in Eigen3. - const float exp_hi = 88.3762626647950; - const float exp_lo = -88.3762626647949; + const llvm::APFloat half = GetIeeeF32(0.5); + const llvm::APFloat one = GetIeeeF32(1.0); + + const llvm::APFloat exp_hi = GetIeeeF32(88.3762626647950); + const llvm::APFloat exp_lo = GetIeeeF32(-88.3762626647949); - const float cephes_LOG2EF = 1.44269504088896341; - const float cephes_exp_C1 = 0.693359375; - const float cephes_exp_C2 = -2.12194440e-4; + const llvm::APFloat cephes_LOG2EF = GetIeeeF32(1.44269504088896341); + const llvm::APFloat cephes_exp_C1 = GetIeeeF32(0.693359375); + const llvm::APFloat cephes_exp_C2 = GetIeeeF32(-2.12194440e-4); - const float cephes_exp_p0 = 1.9875691500E-4; - const float cephes_exp_p1 = 1.3981999507E-3; - const float cephes_exp_p2 = 8.3334519073E-3; - const float cephes_exp_p3 = 4.1665795894E-2; - const float cephes_exp_p4 = 1.6666665459E-1; - const float cephes_exp_p5 = 5.0000001201E-1; + const llvm::APFloat cephes_exp_p0 = GetIeeeF32(1.9875691500E-4); + const llvm::APFloat cephes_exp_p1 = GetIeeeF32(1.3981999507E-3); + const llvm::APFloat cephes_exp_p2 = GetIeeeF32(8.3334519073E-3); + const llvm::APFloat cephes_exp_p3 = GetIeeeF32(4.1665795894E-2); + const llvm::APFloat cephes_exp_p4 = GetIeeeF32(1.6666665459E-1); + const llvm::APFloat cephes_exp_p5 = GetIeeeF32(5.0000001201E-1); llvm::Value* input = &*vector_exp_function->arg_begin(); llvm::Value* input_clamped = vsl.Clamp(input, /*low=*/exp_lo, /*high=*/exp_hi); - llvm::Value* fx = vsl.Floor(vsl.MulAdd(input_clamped, cephes_LOG2EF, 0.5)); + llvm::Value* fx = vsl.Floor(vsl.MulAdd(input_clamped, cephes_LOG2EF, half)); llvm::Value* tmp = vsl.Mul(cephes_exp_C1, fx); llvm::Value* z = vsl.Mul(cephes_exp_C2, fx); llvm::Value* x = vsl.Sub(input_clamped, tmp); @@ -149,7 +155,7 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module, y = vsl.MulAdd(y, x, cephes_exp_p4); y = vsl.MulAdd(y, x, cephes_exp_p5); y = vsl.MulAdd(y, z, x); - y = vsl.Add(1.0f, y); + y = vsl.Add(one, y); // VectorSupportLibrary (intentionally) can't juggle more than one type at a // time so drop down to IRBuilder for this bit. @@ -198,32 +204,28 @@ llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module, llvm::Value* input = &*vector_log_function->arg_begin(); VectorSupportLibrary vsl(F32, vector_width, &ir_builder, "log_f32"); - const float half = 0.5; + const llvm::APFloat half = GetIeeeF32(0.5); + const llvm::APFloat one = GetIeeeF32(1.0); // This implements the same polynomial approximation as implemented in Eigen3. // Returns NaN for x < 0, -INF for x = 0 - const float cephes_SQRTHF = 0.707106781186547524; - const float cephes_log_p0 = 7.0376836292E-2; - const float cephes_log_p1 = -1.1514610310E-1; - const float cephes_log_p2 = 1.1676998740E-1; - const float cephes_log_p3 = -1.2420140846E-1; - const float cephes_log_p4 = +1.4249322787E-1; - const float cephes_log_p5 = -1.6668057665E-1; - const float cephes_log_p6 = +2.0000714765E-1; - const float cephes_log_p7 = -2.4999993993E-1; - const float cephes_log_p8 = +3.3333331174E-1; - const float cephes_log_q1 = -2.12194440e-4; - const float cephes_log_q2 = 0.693359375; + const llvm::APFloat cephes_SQRTHF = GetIeeeF32(0.707106781186547524); + const llvm::APFloat cephes_log_p0 = GetIeeeF32(7.0376836292E-2); + const llvm::APFloat cephes_log_p1 = GetIeeeF32(-1.1514610310E-1); + const llvm::APFloat cephes_log_p2 = GetIeeeF32(1.1676998740E-1); + const llvm::APFloat cephes_log_p3 = GetIeeeF32(-1.2420140846E-1); + const llvm::APFloat cephes_log_p4 = GetIeeeF32(+1.4249322787E-1); + const llvm::APFloat cephes_log_p5 = GetIeeeF32(-1.6668057665E-1); + const llvm::APFloat cephes_log_p6 = GetIeeeF32(+2.0000714765E-1); + const llvm::APFloat cephes_log_p7 = GetIeeeF32(-2.4999993993E-1); + const llvm::APFloat cephes_log_p8 = GetIeeeF32(+3.3333331174E-1); + const llvm::APFloat cephes_log_q1 = GetIeeeF32(-2.12194440e-4); + const llvm::APFloat cephes_log_q2 = GetIeeeF32(0.693359375); // The smallest non denormalized float number. - const float min_norm_pos = tensorflow::bit_cast(0x00800000); - const float minus_inf = tensorflow::bit_cast(0xff800000); - - // NB! This number is denormal and since TF sets the denormals-are-zero flag - // (and if TF didn't, -ffast-math would) trying to operate on this float using - // C++ operations (including, for instance, implicit conversion to double) - // will coerce this to zero. - const float inv_mant_mask = tensorflow::bit_cast(~0x7f800000); + const llvm::APFloat min_norm_pos = GetIeeeF32FromBitwiseRep(0x00800000); + const llvm::APFloat minus_inf = GetIeeeF32FromBitwiseRep(0xff800000); + const llvm::APFloat inv_mant_mask = GetIeeeF32FromBitwiseRep(~0x7f800000); // invalid_mask is set if x is negative or NaN (and therefore output // must be NaN). @@ -251,7 +253,7 @@ llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module, emm0 = ir_builder.CreateSub(emm0, vector_constant_0x7f); llvm::Value* e = - vsl.Add(1.0f, ir_builder.CreateSIToFP(emm0, vsl.vector_type())); + vsl.Add(one, ir_builder.CreateSIToFP(emm0, vsl.vector_type())); // part2: // if( x < SQRTHF ) { @@ -260,8 +262,8 @@ llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module, // } else { x = x - 1.0; } llvm::Value* mask = vsl.FCmpOLTMask(input, cephes_SQRTHF); llvm::Value* tmp = vsl.FloatAnd(input, mask); - input = vsl.Sub(input, 1.0); - e = vsl.Sub(e, vsl.FloatAnd(mask, 1.0)); + input = vsl.Sub(input, one); + e = vsl.Sub(e, vsl.FloatAnd(mask, one)); input = vsl.Add(input, tmp); llvm::Value* x2 = vsl.Mul(input, input); diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc index 0596e80df4..150db1cb6e 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc @@ -103,11 +103,12 @@ llvm::Value* VectorSupportLibrary::Div(llvm::Value* lhs, llvm::Value* rhs) { } } -llvm::Value* VectorSupportLibrary::Clamp(llvm::Value* a, float low, - float high) { +llvm::Value* VectorSupportLibrary::Clamp(llvm::Value* a, + const llvm::APFloat& low, + const llvm::APFloat& high) { AssertCorrectTypes({a}); llvm::Type* type = a->getType(); - CHECK_LT(low, high); + CHECK(low.compare(high) == llvm::APFloat::cmpLessThan); CHECK(scalar_type_->isFloatingPointTy()); return llvm_ir::EmitFloatMin( llvm_ir::EmitFloatMax(a, GetConstantFloat(type, low), ir_builder_), diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.h b/tensorflow/compiler/xla/service/cpu/vector_support_library.h index 010c82f0cf..6479bf76aa 100644 --- a/tensorflow/compiler/xla/service/cpu/vector_support_library.h +++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.h @@ -26,6 +26,16 @@ limitations under the License. namespace xla { namespace cpu { + +// Simple wrappers around llvm::APFloat::APFloat to make the calling code more +// obvious. + +inline llvm::APFloat GetIeeeF32(float f) { return llvm::APFloat(f); } +inline llvm::APFloat GetIeeeF32FromBitwiseRep(int32 bitwise_value) { + return llvm::APFloat(llvm::APFloat::IEEEsingle(), + llvm::APInt(/*numBits=*/32, /*val=*/bitwise_value)); +} + // A thin wrapper around llvm_util.h to make code generating vector math flow // more readable. class VectorSupportLibrary { @@ -41,24 +51,34 @@ class VectorSupportLibrary { llvm::Value* Mul(int64 lhs, llvm::Value* rhs) { return Mul(ir_builder()->getInt64(lhs), rhs); } - llvm::Value* Mul(float lhs, llvm::Value* rhs) { + llvm::Value* Mul(const llvm::APFloat& lhs, llvm::Value* rhs) { return Mul(GetConstantFloat(rhs->getType(), lhs), rhs); } + // If your call resolved to these then you probably wanted the versions taking + // APFloat. + llvm::Value* Mul(double lhs, llvm::Value* rhs) = delete; + llvm::Value* Mul(float lhs, llvm::Value* rhs) = delete; + llvm::Value* Add(llvm::Value* lhs, llvm::Value* rhs); llvm::Value* Add(int64 lhs, llvm::Value* rhs) { return Add(ir_builder()->getInt64(lhs), rhs); } - llvm::Value* Add(float lhs, llvm::Value* rhs) { + llvm::Value* Add(const llvm::APFloat& lhs, llvm::Value* rhs) { return Add(GetConstantFloat(rhs->getType(), lhs), rhs); } + // If your call resolved to these then you probably wanted the versions taking + // APFloat. + llvm::Value* Add(double lhs, llvm::Value* rhs) = delete; + llvm::Value* Add(float lhs, llvm::Value* rhs) = delete; + llvm::Value* Sub(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* Sub(llvm::Value* lhs, float rhs) { + llvm::Value* Sub(llvm::Value* lhs, const llvm::APFloat& rhs) { return Sub(lhs, GetConstantFloat(lhs->getType(), rhs)); } llvm::Value* Max(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* Max(float lhs, llvm::Value* rhs) { + llvm::Value* Max(const llvm::APFloat& lhs, llvm::Value* rhs) { return Max(GetConstantFloat(rhs->getType(), lhs), rhs); } llvm::Value* Div(llvm::Value* lhs, llvm::Value* rhs); @@ -67,19 +87,21 @@ class VectorSupportLibrary { return Add(c, Mul(a, b)); } - llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, float c) { + llvm::Value* MulAdd(llvm::Value* a, llvm::Value* b, const llvm::APFloat& c) { return Add(GetConstantFloat(vector_type(), c), Mul(a, b)); } - llvm::Value* MulAdd(llvm::Value* a, float b, float c) { + llvm::Value* MulAdd(llvm::Value* a, const llvm::APFloat& b, + const llvm::APFloat& c) { return Add(GetConstantFloat(a->getType(), c), Mul(a, GetConstantFloat(a->getType(), b))); } llvm::Value* Floor(llvm::Value* a); - llvm::Value* Clamp(llvm::Value* a, float low, float high); - llvm::Value* SplatFloat(float d) { + llvm::Value* Clamp(llvm::Value* a, const llvm::APFloat& low, + const llvm::APFloat& high); + llvm::Value* SplatFloat(const llvm::APFloat& d) { return GetConstantFloat(vector_type(), d); } @@ -93,7 +115,7 @@ class VectorSupportLibrary { llvm::Value* FCmpEQMask(llvm::Value* lhs, llvm::Value* rhs); llvm::Value* FCmpULEMask(llvm::Value* lhs, llvm::Value* rhs); llvm::Value* FCmpOLTMask(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* FCmpOLTMask(llvm::Value* lhs, float rhs) { + llvm::Value* FCmpOLTMask(llvm::Value* lhs, const llvm::APFloat& rhs) { return FCmpOLTMask(lhs, GetConstantFloat(lhs->getType(), rhs)); } @@ -102,11 +124,11 @@ class VectorSupportLibrary { // generating predicates above this type system oddity makes the kernel IR // generation code less cluttered. llvm::Value* FloatAnd(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* FloatAnd(llvm::Value* lhs, float rhs) { + llvm::Value* FloatAnd(llvm::Value* lhs, const llvm::APFloat& rhs) { return FloatAnd(lhs, GetConstantFloat(lhs->getType(), rhs)); } llvm::Value* FloatOr(llvm::Value* lhs, llvm::Value* rhs); - llvm::Value* FloatOr(llvm::Value* lhs, float rhs) { + llvm::Value* FloatOr(llvm::Value* lhs, const llvm::APFloat& rhs) { return FloatOr(lhs, GetConstantFloat(lhs->getType(), rhs)); } llvm::Value* FloatNot(llvm::Value* lhs); @@ -115,7 +137,7 @@ class VectorSupportLibrary { } llvm::Value* BroadcastScalar(llvm::Value* x); - llvm::Value* BroadcastScalar(float d) { + llvm::Value* BroadcastScalar(const llvm::APFloat& d) { return BroadcastScalar(GetConstantFloat(scalar_type(), d)); } @@ -238,9 +260,8 @@ class VectorSupportLibrary { llvm::Type* IntegerTypeForFloatSize(bool vector); llvm::Value* I1ToFloat(llvm::Value* i1); - llvm::Value* GetConstantFloat(llvm::Type* type, float f) { - llvm::Constant* scalar_value = - llvm::ConstantFP::get(type->getContext(), llvm::APFloat(f)); + llvm::Value* GetConstantFloat(llvm::Type* type, const llvm::APFloat& f) { + llvm::Constant* scalar_value = llvm::ConstantFP::get(type->getContext(), f); if (llvm::isa(type)) { return llvm::ConstantVector::getSplat(vector_size(), scalar_value); } -- GitLab From 7575f334ee0879825ceed23928f5e99d0f71b5f8 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Tue, 13 Feb 2018 17:16:29 -0800 Subject: [PATCH 151/629] [XLA:GPU] Don't crash when the root instruction of a computation is a multi-output fusion node, and avoid some pointer chasing with tuples. Previously, the kernels we generated would have one argument per *top-level* buffer of the input/output. This was fine for inputs. But it doesn't work for outputs: Imagine you're a node that returns a tuple -- e.g. multi-output fusion -- if all you get is a pointer to the top-level buffer of your output (which should contain pointers to the lower-level buffers at some point, but at the moment is just empty), how are you supposed to figure out where to write your output? (This usually worked because most of the time your output would live inside of the big XLA temp buffer, and kernels always get a pointer to that.) Now we pass all the buffers, top-level and otherwise, to our kernel. In addition, we're now willing to dereference statically tuples that live entirely in XLA's temp buffer. Pointers in input tuples must still be dereferenced dynamically, because the caller has the option of giving us these values or not when invoking XLA. This change makes some parts of BufferAssignment/BufferAllocations more truthful. Previously, if you passed a tuple-shaped input to XLA, we'd say in BufferAllocations that the pointer for some subshape of the param was the *top-level tuple pointer*. XLA then knew that this was a lie and would dereference it accordingly. Now we have an explicit notion of a BufferAllocation pointing to a subshape of an input parameter. PiperOrigin-RevId: 185614060 --- .../compiler/xla/service/buffer_assignment.cc | 43 ++- .../compiler/xla/service/buffer_assignment.h | 15 +- .../xla/service/gpu/buffer_allocations.cc | 8 + .../xla/service/gpu/gpu_executable.cc | 13 +- .../xla/service/gpu/hlo_to_ir_bindings.h | 5 +- .../xla/service/gpu/ir_emitter_unnested.cc | 337 +++++++++++++----- .../xla/service/gpu/ir_emitter_unnested.h | 8 +- .../compiler/xla/service/gpu/kernel_thunk.cc | 17 +- .../compiler/xla/service/gpu/kernel_thunk.h | 6 +- tensorflow/compiler/xla/service/hlo.proto | 1 + tensorflow/compiler/xla/shape_util.h | 3 + tensorflow/compiler/xla/tests/BUILD | 2 + .../xla/tests/multioutput_fusion_test.cc | 35 ++ tensorflow/compiler/xla/tests/tuple_test.cc | 3 +- 14 files changed, 370 insertions(+), 126 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index f0a9de5f94..b1e693da9d 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -45,6 +45,8 @@ using ::tensorflow::gtl::FlatMap; using ::tensorflow::gtl::FlatSet; using ::tensorflow::strings::Appendf; using ::tensorflow::strings::HumanReadableNumBytes; +using ::tensorflow::strings::Printf; +using ::tensorflow::strings::StrAppend; size_t BufferAllocation::Slice::Hasher::operator()(Slice s) const { uint64 h = std::hash()(s.index()); @@ -93,6 +95,9 @@ BufferAllocationProto BufferAllocation::ToProto() const { proto.set_color(color_.value()); if (is_entry_computation_parameter_) { proto.set_is_entry_computation_parameter(true); + for (int64 idx : param_shape_index()) { + proto.add_parameter_shape_index(idx); + } proto.set_parameter_number(parameter_number_); } proto.set_maybe_live_out(maybe_live_out_); @@ -112,25 +117,24 @@ BufferAllocationProto BufferAllocation::ToProto() const { string BufferAllocation::ToString() const { string output; - tensorflow::strings::StrAppend( - &output, tensorflow::strings::Printf("allocation %lld: %p, size %lld", - index_, this, size())); + Appendf(&output, "allocation %lld: %p, size %lld", index_, this, size()); if (color().value() != 0) { - tensorflow::strings::StrAppend(&output, ", color ", color().value()); + StrAppend(&output, ", color ", color().value()); } if (is_entry_computation_parameter()) { - tensorflow::strings::StrAppend(&output, ", parameter ", parameter_number()); + StrAppend(&output, ", parameter ", parameter_number(), " at ShapeIndex ", + param_shape_index().ToString()); } if (is_thread_local()) { - tensorflow::strings::StrAppend(&output, ", thread-local"); + StrAppend(&output, ", thread-local"); } if (maybe_live_out()) { - tensorflow::strings::StrAppend(&output, ", maybe-live-out"); + StrAppend(&output, ", maybe-live-out"); } if (IsPreallocatedTempBuffer()) { - tensorflow::strings::StrAppend(&output, ", preallocated-temp"); + StrAppend(&output, ", preallocated-temp"); } - tensorflow::strings::StrAppend(&output, ":\n"); + StrAppend(&output, ":\n"); // Dump the assigned buffers ordered by id. std::vector sorted_buffers; for (const auto& buffer_offset_size : assigned_buffers_) { @@ -142,12 +146,11 @@ string BufferAllocation::ToString() const { }); for (const LogicalBuffer* buffer : sorted_buffers) { const OffsetSize& offset_size = FindOrDie(assigned_buffers_, buffer); - tensorflow::strings::StrAppend( - &output, - tensorflow::strings::Printf( - " %s [%lld,%lld]: %s\n", buffer->ToString().c_str(), - offset_size.offset, offset_size.size, - ShapeUtil::HumanStringWithLayout(buffer->shape()).c_str())); + StrAppend(&output, + tensorflow::strings::Printf( + " %s [%lld,%lld]: %s\n", buffer->ToString().c_str(), + offset_size.offset, offset_size.size, + ShapeUtil::HumanStringWithLayout(buffer->shape()).c_str())); } return output; } @@ -840,7 +843,7 @@ Status BufferAssigner::AssignBuffersForComputation( /*is_thread_local=*/false, /*is_reusable=*/false); allocation->set_entry_computation_parameter( - instruction->parameter_number()); + instruction->parameter_number(), buffer->index()); VLOG(3) << "New allocation #" << allocation->index() << " for entry computation parameter: " << *buffer; continue; @@ -1411,14 +1414,17 @@ void BufferAssigner::AssignColocatedBufferSets( FlatSet* colocated_allocations) { for (const ColocatedBufferSet& colocated_buffer_set : colocated_buffer_sets) { BufferAllocation* allocation = nullptr; - // Set 'entry_parameter_number' if entry param in 'colocated_buffer_set'. + // Set 'entry_parameter_number' and 'entry_parameter_shape_idx' if entry + // param in 'colocated_buffer_set'. int64 entry_parameter_number = -1; + const ShapeIndex* entry_parameter_shape_idx = nullptr; for (const LogicalBuffer* buffer : colocated_buffer_set) { const HloInstruction* instruction = buffer->instruction(); const HloComputation* computation = instruction->parent(); if (instruction->opcode() == HloOpcode::kParameter && computation == computation->parent()->entry_computation()) { entry_parameter_number = instruction->parameter_number(); + entry_parameter_shape_idx = &buffer->index(); break; } } @@ -1439,7 +1445,8 @@ void BufferAssigner::AssignColocatedBufferSets( // body computation (which updates in place). // Set 'entry_computation_parameter' to indicate that it contains // an entry parameter, and to prevent reuse in MaybeAssignBuffer. - allocation->set_entry_computation_parameter(entry_parameter_number); + allocation->set_entry_computation_parameter( + entry_parameter_number, *entry_parameter_shape_idx); } colocated_allocations->insert(allocation->index()); } else { diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index 65019b6b17..6b7fd0014d 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -91,6 +91,13 @@ class BufferAllocation { return parameter_number_; } + // If this allocation is for a parameter of the entry computation, this + // function returns which subshape of the parameter the allocation is for. + const ShapeIndex& param_shape_index() const { + CHECK(is_entry_computation_parameter_); + return param_shape_index_; + } + // Returns whether this allocation is assigned a LogicalBuffer which may // be live out of the entry computation. bool maybe_live_out() const { return maybe_live_out_; } @@ -203,9 +210,11 @@ class BufferAllocation { // Adds a LogicalBuffer to the set assigned to this buffer. void AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size); - void set_entry_computation_parameter(int64 parameter_number) { + void set_entry_computation_parameter(int64 parameter_number, + ShapeIndex param_shape_index) { is_entry_computation_parameter_ = true; parameter_number_ = parameter_number; + param_shape_index_ = std::move(param_shape_index); } void set_maybe_live_out(bool value) { maybe_live_out_ = value; } void set_index(Index index) { index_ = index; } @@ -235,6 +244,10 @@ class BufferAllocation { // indicates the index (starting from 0) of the parameter. int64 parameter_number_ = 0; + // If this buffer is for an entry computation parameter, which subshape of the + // parameter is it for? + ShapeIndex param_shape_index_; + // Whether the allocation contains a LogicalBuffer which may be live-out of // the entry computation. Note that this flag is conservatively computed by // TuplePointsToAnalysis. That is, an allocation marked `maybe_live_out_` diff --git a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc index ed78fef411..2029c303d4 100644 --- a/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc +++ b/tensorflow/compiler/xla/service/gpu/buffer_allocations.cc @@ -98,6 +98,14 @@ StatusOr> BufferAllocations::Builder::Build( } } + if (VLOG_IS_ON(2)) { + for (BufferAllocation::Index i = 0; i < num_buffers; ++i) { + const auto& buf = buffer_allocations->buffers_[i]; + VLOG(2) << "Buffer " << i << " -> " << buf.opaque() << " (" << buf.size() + << "B)"; + } + } + return std::move(buffer_allocations); } diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index f5d67b9ea9..623d6714de 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -262,9 +262,16 @@ StatusOr> GpuExecutable::ExecuteOnStream( ++i) { const BufferAllocation& allocation = assignment_->GetAllocation(i); if (allocation.is_entry_computation_parameter()) { - auto param_no = allocation.parameter_number(); - buffer_allocations_builder.RegisterBuffer( - i, arguments[param_no]->root_buffer()); + // The caller must give us a buffer for ShapeIndex {} of every parameter. + // It can optionally give us a buffer for other ShapeIndices, but we + // ignore them: Because we can't rely on these sub-buffers' addresses + // being available, our generated code can't use them. Instead, it must + // chase pointers starting at the tuple root. + if (allocation.param_shape_index().empty()) { + auto param_no = allocation.parameter_number(); + buffer_allocations_builder.RegisterBuffer( + i, arguments[param_no]->root_buffer()); + } } } se::StreamExecutor* executor = run_options->stream()->parent(); diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h index 1fe7970e7d..3d34311b43 100644 --- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h +++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h @@ -66,13 +66,14 @@ class HloToIrBindings { } llvm::Value* GetTempBufferBase() const { return temp_buffer_base_; } + void SetTempBufferBase(llvm::Value* v) { temp_buffer_base_ = v; } // A helper method that returns the base pointer of the IrArray containing the // output of "inst".at the given ShapeIndex. llvm::Value* GetBasePointer(const HloInstruction& hlo, const ShapeIndex& shape_index = {}) const { auto it = base_ptrs_.find(&hlo); - CHECK(it != base_ptrs_.end()); + CHECK(it != base_ptrs_.end()) << hlo.ToString(); return it->second.element(shape_index); } @@ -113,7 +114,7 @@ class HloToIrBindings { std::unordered_map> base_ptrs_; // The address of the memory block that contains all temporary buffers. - llvm::Value* temp_buffer_base_; + llvm::Value* temp_buffer_base_ = nullptr; llvm_ir::AliasAnalysis alias_analysis_; }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 7e20af3291..aa2a0a9800 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -75,6 +75,10 @@ namespace gpu { namespace { using llvm_ir::IrName; +using tensorflow::gtl::ArraySlice; +using tensorflow::gtl::nullopt; +using tensorflow::gtl::optional; +using tensorflow::strings::StrCat; // If a dimensions is smaller than this, untiled transposition may be more // efficient. @@ -137,6 +141,38 @@ void UpdateLaunchDimensions(const LaunchDimensions& launch_dims, Thunk* thunk, llvm::MDString::get(llvm_context, "reqntidx"), llvm::ConstantAsMetadata::get(threads_per_block_ir_value)})); } + +// Tries to get a Slice for the given instruction at the given index, but +// returns nullopt if we might not know the slice's address at runtime without +// dereferencing a containing tuple. +// +// In particular, when XLA accepts a parameter of tuple type, the caller has the +// option of telling XLA what are the values inside of the tuple, or just giving +// XLA a pointer to the top-level tuple and letting us chase the pointers on the +// GPU. We therefore cannot rely having these pointers to parameter sub-buffers +// being present when we run the program. +optional GetKnownAtRuntimeSlice( + const HloInstruction* instr, const ShapeIndex& index, + const BufferAssignment& buffer_assn) { + auto maybe_slice = buffer_assn.GetUniqueSlice(instr, index); + if (!maybe_slice.ok()) { + return nullopt; + } + // BufferAllocation gives a slice and alloc to every buffer accessed by XLA, + // but we don't necessarily know the runtime address of sub-buffers of input + // parameters. + const BufferAllocation::Slice& slice = maybe_slice.ValueOrDie(); + const BufferAllocation* alloc = slice.allocation(); + if (alloc->IsInputOrOutput() && !alloc->maybe_live_out() && + !alloc->param_shape_index().empty()) { + return nullopt; + } + + // Otherwise, we will know the address of this slice at runtime without having + // to dereference a tuple. + return slice; +} + } // namespace IrEmitterUnnested::IrEmitterUnnested(const HloModuleConfig& hlo_module_config, @@ -154,16 +190,20 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) { } namespace { -bool ImplementedAsHostToDeviceMemcpy(const HloInstruction& hlo) { - // `hlo` needs to satisfy three conditions to be implemented as a +bool ImplementedAsHostToDeviceMemcpy(const BufferAssignment& buffer_assignment, + const HloInstruction& hlo) { + // `hlo` needs to satisfy the following conditions to be implemented as a // host-to-device cuMemcpy. // // 1. `hlo` is a kCopy instruction. // 2. `hlo`'s only operand is a kConstant instruction. // 3. `hlo` and its operand have the same shape (thus the same layout too). + // 4. The address of `hlo`'s buffer is known at runtime (without dereferencing + // pointers in a tuple). return hlo.opcode() == HloOpcode::kCopy && hlo.operand(0)->opcode() == HloOpcode::kConstant && - ShapeUtil::Equal(hlo.operand(0)->shape(), hlo.shape()); + ShapeUtil::Equal(hlo.operand(0)->shape(), hlo.shape()) && + GetKnownAtRuntimeSlice(&hlo, {}, buffer_assignment).has_value(); } bool ImplementedAsDeviceToDeviceMemcpy( @@ -177,13 +217,15 @@ bool ImplementedAsDeviceToDeviceMemcpy( // instance) which means the source buffer also resides on the device. return hlo.opcode() == HloOpcode::kCopy && ShapeUtil::Equal(hlo.operand(0)->shape(), hlo.shape()) && - buffer_assignment.HasTopLevelAllocation(hlo.operand(0)); + GetKnownAtRuntimeSlice(&hlo, {}, buffer_assignment).has_value() && + GetKnownAtRuntimeSlice(hlo.operand(0), {}, buffer_assignment) + .has_value(); } } // namespace llvm::Function* IrEmitterUnnested::BuildKernelPrototype( const HloInstruction& inst, - tensorflow::gtl::ArraySlice escaped_hlos) { + tensorflow::gtl::ArraySlice args) { // Compute the kernel name. The opcode string may contain "-" which cannot be // in a PTX function name, so sanitize the name before uniquifying it. string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName( @@ -192,43 +234,32 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype( // Create the kernel and add it to the module. llvm::Module* module = ir_emitter_context_->llvm_module(); llvm::LLVMContext& context = module->getContext(); - int num_escaped_hlos = escaped_hlos.size(); llvm::FunctionType* kernel_type = llvm::FunctionType::get( /*Result=*/llvm::Type::getVoidTy(context), - std::vector(num_escaped_hlos + 1, - ir_builder_.getInt8PtrTy()), + std::vector(args.size(), ir_builder_.getInt8PtrTy()), /*isVarArg=*/false); llvm::Function* kernel = llvm::Function::Create(kernel_type, llvm::GlobalValue::ExternalLinkage, kernel_name.c_str(), module); - // Add dereferenceable information to each of the escaped HLO parameters. - for (size_t arg_no = 0; arg_no < escaped_hlos.size(); ++arg_no) { - const HloInstruction* escaped_hlo = escaped_hlos[arg_no]; - const Shape& escaped_hlo_shape = escaped_hlo->shape(); - int64 escaped_hlo_size = llvm_ir::ByteSizeOf( - escaped_hlo_shape, ir_emitter_context_->llvm_module()->getDataLayout()); - kernel->addDereferenceableAttr(arg_no + 1, escaped_hlo_size); - } - - // The last argument is a pointer to the temporary buffer memory block. - // We know that it doesn't alias any of the escaped arguments (the inputs + - // the result). We also know how many bytes can be dereferenced in it. - const llvm::Argument& temp_buffer = *std::prev(kernel->arg_end()); - int64 temp_buffer_arg_no = temp_buffer.getArgNo(); - int64 temp_allocation_total_size = - ir_emitter_context_->buffer_assignment().temp_allocation_total_size(); - if (temp_allocation_total_size != 0) { - kernel->addDereferenceableAttr(temp_buffer_arg_no + 1, - temp_allocation_total_size); - } - kernel->addParamAttr(temp_buffer_arg_no, llvm::Attribute::NoAlias); + // Add dereferenceable and alignment information to each of the kernel's + // parameters. + auto arg_it = kernel->arg_begin(); + for (size_t arg_no = 0; arg_no < args.size(); ++arg_no) { + const BufferAllocation* alloc = args[arg_no]; + llvm::Argument* fn_arg = &*arg_it; + ++arg_it; - // All arguments to a kernel must be aligned to kCudaMallocAlignBytes. - for (int64 i = 0; i < kernel->arg_size(); ++i) { + kernel->addDereferenceableAttr(arg_no + 1, alloc->size()); kernel->addParamAttr( - i, llvm::Attribute::get(context, llvm::Attribute::Alignment, - kCudaMallocAlignBytes)); + arg_no, llvm::Attribute::get(context, llvm::Attribute::Alignment, + kCudaMallocAlignBytes)); + + if (alloc->IsPreallocatedTempBuffer()) { + fn_arg->setName("temp_buf"); + } else { + fn_arg->setName(llvm_ir::AsStringRef(StrCat("alloc", alloc->index()))); + } } // TODO(b/65380986): Investigate if adding fast math flags for generated @@ -245,10 +276,9 @@ llvm::Function* IrEmitterUnnested::BuildKernelPrototype( // Update the insert point to the entry basic block. llvm::BasicBlock* entry_bb = - llvm::BasicBlock::Create(context, - "entry", // The name of the basic block. - kernel); // The parent/owner of "entry_bb". - // Emit a "return void" at entry_bb's end, and sets the insert point before + llvm::BasicBlock::Create(context, /*Name=*/"entry", /*Parent=*/kernel); + + // Emit a "return void" at entry_bb's end, and set the insert point before // that return instruction. ir_builder_.SetInsertPoint(llvm::ReturnInst::Create(context, entry_bb)); @@ -869,7 +899,8 @@ int64 EmitTranspose021Tiled(llvm_ir::IrArray input, llvm_ir::IrArray output, } // namespace Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) { - if (ImplementedAsHostToDeviceMemcpy(*copy)) { + if (ImplementedAsHostToDeviceMemcpy(ir_emitter_context_->buffer_assignment(), + *copy)) { thunk_sequence_->emplace_back(BuildHostToDeviceCopyThunk(copy)); return Status::OK(); } @@ -1931,62 +1962,202 @@ Status IrEmitterUnnested::HandleInfeed(HloInstruction* infeed) { return Status::OK(); } -llvm::Function* IrEmitterUnnested::EmitBasePointersForHloAndItsOperands( - const HloInstruction& hlo, std::vector* io_hlos) { - const BufferAssignment& buffer_assignment = - ir_emitter_context_->buffer_assignment(); - // GetTupleElement instructions are implemented by emitting IR that indexes - // and loads the target tuple element pointer from its operand (possibly - // recursively). For this reason, GetTupleElement instructions are associated - // with their operand buffer in 'io_hlos' and 'non_io_hlos' below. - std::vector non_io_hlos; - for (const HloInstruction* operand : hlo.operands()) { - const HloInstruction* to_lookup = operand->LatestNonGteAncestor(); - if (buffer_assignment.HasTopLevelAllocation(to_lookup) && - buffer_assignment.GetUniqueTopLevelSlice(to_lookup) - .ConsumeValueOrDie() - .allocation() - ->IsInputOrOutput()) { - io_hlos->push_back(operand); - } else { - non_io_hlos.push_back(operand); +// Figures out how to access the buffers for all subshapes of hlo's operands and +// for hlo itself (i.e. all the buffers produced by HLO). +// +// Returns a map keyed on the pair {HloInstruction, ShapeIndex}. The value for +// this key is a pair {Slice, ShapeIndex}, where the slice tells you the root +// buffer to look in, and the ShapeIndex describes how to dereference starting +// at that buffer to get to the buffer in question. +// +// For example, if {hlo, {1}} is mapped to {slice, {3, 4}}, then the buffer for +// hlo at ShapeIndex {1} (i.e. the buffer for the second tuple element of hlo) +// is found at slice[3][4]. That is, slice is a void***, which we dereference +// twice -- first at index 3, and then at index 4 -- to get the address of our +// buffer. +// +// This function conservatively assumes that we'll touch all sub-buffers of +// every operand and of the output. +static std::map, + std::pair> +GetHloBufferSlices(const HloInstruction* hlo, + const BufferAssignment& buffer_assn) { + std::map, + std::pair> + slices; + + // Tries to find a slice plus an array of indices i1, ..., iN such that the + // sub-buffer for instr at index can be found at slice[i1]...[iN]. + auto find_slice_for = [&](const HloInstruction* instr, + const ShapeIndex& index) + -> optional> { + // Simple, common case: Is the buffer for instr known at runtime? If so, + // we're done. + auto slice = GetKnownAtRuntimeSlice(instr, index, buffer_assn); + if (slice.has_value()) { + return {{*slice, ShapeIndex()}}; } - } - CHECK_NE(HloOpcode::kGetTupleElement, hlo.opcode()); - if (buffer_assignment.HasTopLevelAllocation(&hlo) && - buffer_assignment.GetUniqueTopLevelSlice(&hlo) - .ConsumeValueOrDie() - .allocation() - ->IsInputOrOutput()) { - io_hlos->push_back(&hlo); - } else { - non_io_hlos.push_back(&hlo); + // If we don't know the buffer for instr at index, see if we know the buffer + // for instr at index without its last element. If so, we can dynamically + // find the buffer for instr by dereferencing a pointer in that buffer. + // Continue looking this way until we run out of elements in 'index'. + ShapeIndex new_index = index; + ShapeIndex gte_indices; + while (!new_index.empty()) { + gte_indices.push_front(new_index.back()); + new_index.pop_back(); + auto slice = GetKnownAtRuntimeSlice(instr, new_index, buffer_assn); + if (slice.has_value()) { + return {{*slice, gte_indices}}; + } + } + + // If *that* didn't work, check whether instr is a GTE instruction. If it + // is, see if we can get a buffer for its parent, and continue walking up + // parents until we find a defined buffer or we hit something that's not a + // GTE. + const HloInstruction* parent = instr; + while (parent->opcode() == HloOpcode::kGetTupleElement) { + gte_indices.push_front(parent->tuple_index()); + parent = parent->operand(0); + + auto slice = GetKnownAtRuntimeSlice(parent, {}, buffer_assn); + if (slice.has_value()) { + return {{*slice, gte_indices}}; + } + } + + return nullopt; + }; + + // Adds entries for all subshapes of instr to `slices`. + auto add_slices_for = [&](const HloInstruction* instr) { + // GPU constants don't have buffers; don't bother looking for one. + if (instr->IsConstant()) { + return; + } + + ShapeUtil::ForEachSubshape( + instr->shape(), [&](const Shape& /*shape*/, const ShapeIndex& index) { + if (slices.count({instr, index})) { + // HLOs can have duplicate operands; don't bother redoing work. + return; + } + auto maybe_slice = find_slice_for(instr, index); + if (maybe_slice.has_value()) { + slices[{instr, index}] = *maybe_slice; + } else { + VLOG(1) << "Couldn't find buffer for " << instr->ToString() + << " at index " << index.ToString(); + } + }); + }; + + add_slices_for(hlo); + for (const HloInstruction* operand : hlo->operands()) { + // Conservatively assume we'll need the buffers for all subshapes of the + // operand. + add_slices_for(operand); } - llvm::Function* kernel = BuildKernelPrototype(hlo, *io_hlos); - // bindings_ is reused because the bindings of kConstant to their underlying - // llvm::Constant can be shared for all HLOs in this computation. - bindings_.EmitBasePointersForHlos(*io_hlos, non_io_hlos); - return kernel; + return slices; } std::unique_ptr IrEmitterUnnested::BuildKernelThunk( const HloInstruction* inst) { - std::vector io_hlos; - llvm::Function* kernel = - EmitBasePointersForHloAndItsOperands(*inst, &io_hlos); + const BufferAssignment& buffer_assn = + ir_emitter_context_->buffer_assignment(); - // Compute the input buffer indices. - std::vector io_buffers; - io_buffers.reserve(io_hlos.size()); - for (const HloInstruction* io_hlo : io_hlos) { - io_buffers.push_back(GetAllocationSlice(*io_hlo->LatestNonGteAncestor())); + std::map, + std::pair> + hlo_slices = GetHloBufferSlices(inst, buffer_assn); + + // Figure out which buffer allocations need to be passed as arguments to our + // kernel. This is simply all of the allocations referenced in hlo_slices, + // plus the XLA temp buffer (if we have it). We always include the temp + // buffer because even if the kernel itself doesn't use it, a nested + // subcomputation within the kernel (e.g. a kMap's computation) might. + std::unordered_set buffers_needed; + for (const auto& kv : hlo_slices) { + buffers_needed.insert(kv.second.first.allocation()); + } + tensorflow::gtl::optional temp_buffer; + for (const BufferAllocation& alloc : buffer_assn.Allocations()) { + if (alloc.IsPreallocatedTempBuffer()) { + if (!temp_buffer.has_value()) { + temp_buffer = &alloc; + } else { + LOG(FATAL) << "Multiple temp buffers found, but only one is allowed!"; + } + } + } + if (temp_buffer.has_value()) { + buffers_needed.insert(*temp_buffer); + } + + // We'll pass a pointer to each of the elements of `buffers` to our kernel, in + // this order. + std::vector buffers(buffers_needed.begin(), + buffers_needed.end()); + std::sort(buffers.begin(), buffers.end(), + [](const BufferAllocation* a, const BufferAllocation* b) { + return a->index() < b->index(); + }); + + llvm::Function* kernel = BuildKernelPrototype(*inst, buffers); + + // Build a map from a BufferAllocation to the corresponding argument in our + // kernel. + std::unordered_map kernel_args; + { + auto arg_it = kernel->arg_begin(); + auto buffers_it = buffers.begin(); + for (; arg_it != kernel->arg_end(); ++arg_it, ++buffers_it) { + kernel_args[*buffers_it] = arg_it; + } + } + + // For each buffer our kernel might want to touch, bind it to a value derived + // from our kernel args. + for (const auto& kv : hlo_slices) { + const HloInstruction* instr = kv.first.first; + const ShapeIndex& index = kv.first.second; + const BufferAllocation::Slice& slice = kv.second.first; + const ShapeIndex& gte_index = kv.second.second; + + VLOG(3) << "Buffer for " << instr->ToString() << " at " << index.ToString() + << " is found in slice " << slice.ToString() << " at GTE index " + << gte_index.ToString(); + + llvm::Value* loc = + ir_builder_.CreateInBoundsGEP(kernel_args.at(slice.allocation()), + {ir_builder_.getInt64(slice.offset())}); + + // If gte_index is nonempty, we have to dereference `loc` to get to the + // value we're ultimately interested in. + llvm::Type* int8_double_pointer = + llvm::PointerType::get(ir_builder_.getInt8PtrTy(), /*AddressSpace=*/0); + for (int64 idx : gte_index) { + loc = ir_builder_.CreateBitCast(loc, int8_double_pointer); + loc = ir_builder_.CreateLoad( + ir_builder_.CreateInBoundsGEP(loc, {ir_builder_.getInt64(idx)})); + } + + bindings_.BindHloToIrValue(*instr, loc, index); + } + + // Bind the temp buffer so that nested subcomputations can find it if they + // need. + if (temp_buffer.has_value()) { + bindings_.SetTempBufferBase(kernel_args.at(*temp_buffer)); + } else { + bindings_.SetTempBufferBase( + llvm::ConstantPointerNull::get(ir_builder_.getInt8PtrTy())); } - // Create a KernelThunk that launches the kernel that implements "inst". - return MakeUnique(io_buffers, - llvm_ir::AsString(kernel->getName()), inst); + return MakeUnique(buffers, llvm_ir::AsString(kernel->getName()), + inst); } std::unique_ptr IrEmitterUnnested::BuildHostToDeviceCopyThunk( diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index 56ab8208ce..688760efbd 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -93,14 +93,10 @@ class IrEmitterUnnested : public IrEmitter { std::unique_ptr BuildThunk(const HloInstruction* hlo); // Builds the prototype of the IR kernel for `inst` and adds it to the module. + // This kernel takes as arguments pointers to the given buffer allocations. llvm::Function* BuildKernelPrototype( const HloInstruction& inst, - tensorflow::gtl::ArraySlice escaped_hlos); - - // Emits the base pointers for `hlo` and its operands. `io_hlos` will store - // all input/output HLOs among `hlo` and its operands. - llvm::Function* EmitBasePointersForHloAndItsOperands( - const HloInstruction& hlo, std::vector* io_hlos); + tensorflow::gtl::ArraySlice args); // EmitColumnReduction and EmitRowReduction emit code for column and row // reduction of a matrix and/or 3D tensor. Row and column reduction have diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc index 9660699369..c20a781a33 100644 --- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc @@ -29,10 +29,10 @@ namespace xla { namespace gpu { KernelThunk::KernelThunk( - tensorflow::gtl::ArraySlice io_buffers, + tensorflow::gtl::ArraySlice args, const string& kernel_name, const HloInstruction* hlo_instruction) : Thunk(Kind::kKernel, hlo_instruction), - io_buffers_(io_buffers.begin(), io_buffers.end()), + args_(args.begin(), args.end()), kernel_name_(kernel_name) {} tensorflow::Status KernelThunk::Initialize(const GpuExecutable& executable) { @@ -42,7 +42,7 @@ tensorflow::Status KernelThunk::Initialize(const GpuExecutable& executable) { return tensorflow::Status::OK(); } - loader_spec_.reset(new se::MultiKernelLoaderSpec(io_buffers_.size() + 1)); + loader_spec_.reset(new se::MultiKernelLoaderSpec(args_.size())); tensorflow::StringPiece ptx = executable.ptx(); // Convert tensorflow::StringPiece to se::port::StringPiece because // StreamExecutor uses the latter. @@ -81,15 +81,16 @@ tensorflow::Status KernelThunk::ExecuteOnStream( kernel = &it->second; } + VLOG(3) << "Launching " << kernel->name(); // Launch the kernel with potentially multiple blocks and threads. static constexpr int kKernelArgsLimit = 1024; auto kernel_args = MakeUnique>(); - for (const BufferAllocation::Slice io_buffer : io_buffers_) { - kernel_args->add_device_memory_argument( - buffer_allocations.GetDeviceAddress(io_buffer)); + for (const BufferAllocation* arg : args_) { + const auto& buf = buffer_allocations.GetDeviceAddress(arg->index()); + kernel_args->add_device_memory_argument(buf); + VLOG(3) << " Arg: alloc #" << arg->index() << ": " << buf.opaque() << " (" + << buf.size() << "B)"; } - kernel_args->add_device_memory_argument( - buffer_allocations.GetTempBufferBase()); if (!stream->parent()->Launch( stream, se::ThreadDim(launch_dimensions.threads_per_block()), se::BlockDim(launch_dimensions.block_count()), *kernel, diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h index 350b5aaf36..9ae455e2fc 100644 --- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h @@ -46,7 +46,7 @@ class KernelThunk : public Thunk { // Constructs a thunk for the given kernel. // // `hlo_instruction` is as in Thunk. Other arguments are as the class members. - KernelThunk(tensorflow::gtl::ArraySlice io_buffers, + KernelThunk(tensorflow::gtl::ArraySlice args, const string& kernel_name, const HloInstruction* hlo_instruction); KernelThunk(const KernelThunk&) = delete; KernelThunk& operator=(const KernelThunk&) = delete; @@ -63,8 +63,8 @@ class KernelThunk : public Thunk { perftools::gputools::Stream* stream) override; private: - // The indices of the input/output buffers. - const std::vector io_buffers_; + // Buffers passed to the kernel as arguments. + const std::vector args_; // Entry kernel name for the computation. const string kernel_name_; diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 0e9a852788..36db711c6c 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -200,6 +200,7 @@ message BufferAllocationProto { bool is_reusable = 4; bool is_entry_computation_parameter = 5; int64 parameter_number = 6; + repeated int64 parameter_shape_index = 10; bool maybe_live_out = 7; int64 color = 8; repeated Assigned assigned = 9; diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index d8a00880e9..19b1aa93bd 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -63,6 +63,9 @@ class ShapeIndex { void push_back(int64 value) { indices_.push_back(value); } void pop_back() { indices_.pop_back(); } + // push_front is O(n^2), but shapes don't usually have a ton of dimensions. + void push_front(int64 value) { indices_.insert(indices_.begin(), value); } + std::vector::const_iterator begin() const { return indices_.begin(); } std::vector::const_iterator end() const { return indices_.end(); } std::vector::iterator begin() { return indices_.begin(); } diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 60f3e61807..5ff7740752 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1592,6 +1592,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", @@ -1618,6 +1619,7 @@ xla_test( "//tensorflow/compiler/xla/client:computation_builder", "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/service:hlo", + "//tensorflow/compiler/xla/service:hlo_runner", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:hlo_test_base", diff --git a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc index 6e6cb7ff1e..0a603f4954 100644 --- a/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc +++ b/tensorflow/compiler/xla/tests/multioutput_fusion_test.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_runner.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" @@ -35,6 +36,7 @@ limitations under the License. #include "tensorflow/compiler/xla/tests/test_macros.h" #include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/protobuf.h" @@ -176,5 +178,38 @@ XLA_TEST_F(MultiOutputFusionTest, 2DFusionSize129) { RunTest2D(true, 129); } XLA_TEST_F(MultiOutputFusionTest, DiffentTypesNoFusion) { RunTest1D(false, 8); } XLA_TEST_F(MultiOutputFusionTest, DiffentTypesFusion) { RunTest1D(true, 8); } +XLA_TEST_F(MultiOutputFusionTest, FusionNodeIsRoot) { + const char* testcase = R"( + HloModule m + + fused_computation { + x.param_0 = (((s32[]), f32[]), (f32[], s32[])) parameter(0) + gte.3 = ((s32[]), f32[]) get-tuple-element(x.param_0), index=0 + gte.2 = (s32[]) get-tuple-element(gte.3), index=0 + gte.4 = s32[] get-tuple-element(gte.2), index=0 + copy = s32[] copy(gte.4) + ROOT tuple = (s32[]) tuple(copy) + } + + ENTRY thing.v3 { + x = (((s32[]), f32[]), (f32[], s32[])) parameter(0) + ROOT fusion = (s32[]) fusion(x), kind=kLoop, calls=fused_computation + } + )"; + auto module = + HloRunner::CreateModuleFromString(testcase, GetDebugOptionsForTest()) + .ValueOrDie(); + auto param = Literal::MakeTupleOwned( + Literal::MakeTupleOwned( + Literal::MakeTupleOwned(Literal::CreateR0(42)), + Literal::CreateR0(1.0)), + Literal::MakeTupleOwned(Literal::CreateR0(3.0), + Literal::CreateR0(4))); + TF_ASSERT_OK_AND_ASSIGN(auto result, + Execute(std::move(module), {param.get()})); + EXPECT_TRUE(LiteralTestUtil::Equal( + *result, *Literal::MakeTupleOwned(Literal::CreateR0(42)))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/tuple_test.cc b/tensorflow/compiler/xla/tests/tuple_test.cc index a8bca70d85..2029312f94 100644 --- a/tensorflow/compiler/xla/tests/tuple_test.cc +++ b/tensorflow/compiler/xla/tests/tuple_test.cc @@ -194,8 +194,7 @@ XLA_TEST_F(TupleTest, TupleGTEToTuple) { ComputeAndCompareTuple(&builder, *expected, {}, error_spec_); } -// TODO(b/68395210): GPU does not tolerate ambiguous top-level buffers. -XLA_TEST_F(TupleTest, DISABLED_ON_GPU(SelectBetweenPredTuples)) { +XLA_TEST_F(TupleTest, SelectBetweenPredTuples) { ComputationBuilder b(client_, TestName()); ComputationDataHandle v1, v2; -- GitLab From 3d803b9421401c5118ec29d82dbeb1808d25f3b3 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 13 Feb 2018 17:16:34 -0800 Subject: [PATCH 152/629] Lazily reads from resource variables in eager mode. PiperOrigin-RevId: 185614070 --- tensorflow/python/eager/function.py | 6 ++ tensorflow/python/layers/base.py | 2 + .../python/ops/resource_variable_ops.py | 98 ++++++++++++++----- tensorflow/python/ops/state_ops.py | 8 +- 4 files changed, 87 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index d352d67523..28f5289ffc 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -593,10 +593,16 @@ def _defun_internal(name, func, args, kwds): with tmp_graph.as_default(): func_inputs = _get_defun_inputs(args) + def convert(x): + if x is None: + return None + return ops.convert_to_tensor_or_indexed_slices(x) + with capture_tensors(captures): this_tape = tape.push_new_tape() try: func_outputs = func(*func_inputs, **kwds) + func_outputs = nest.map_structure(convert, func_outputs) finally: tape.pop_tape(this_tape) variables = this_tape.watched_variables() diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 38b75e6d3c..8314c4aa87 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -263,6 +263,8 @@ class Layer(object): return # Updates already applied when in eager mode. updates = _to_list(updates) + updates = [x if isinstance(x, ops.Operation) + else ops.convert_to_tensor(x) for x in updates] self._updates += updates if inputs is None: for u in updates: diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 11f452fa46..9ab789abad 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -783,38 +783,38 @@ class ResourceVariable(variables.Variable): # TODO(apassos): this here and below is not atomic. Consider making it # atomic if there's a way to do so without a performance cost for those who # don't need it. - with ops.control_dependencies([ - gen_resource_variable_ops.assign_sub_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name) - ]): - return self.read_value() + return self._lazy_read(gen_resource_variable_ops.assign_sub_variable_op( + self.handle, + ops.convert_to_tensor(delta, dtype=self.dtype), + name=name)) def assign_add(self, delta, use_locking=None, name=None): - with ops.control_dependencies([ - gen_resource_variable_ops.assign_add_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name) - ]): - return self.read_value() + return self._lazy_read(gen_resource_variable_ops.assign_add_variable_op( + self.handle, + ops.convert_to_tensor(delta, dtype=self.dtype), + name=name)) + + def _lazy_read(self, op): + if hasattr(self, "_trainable") and self._trainable: + tape.watch_variable(self) + return _UnreadVariable( + self._handle, self.dtype, self._handle_device, self._shape, + self._in_graph_mode, + self._handle_deleter if not self._in_graph_mode else None, op) def assign(self, value, use_locking=None, name=None): value_tensor = ops.convert_to_tensor(value, dtype=self.dtype) self._shape.assert_is_compatible_with(value_tensor.shape) - with ops.control_dependencies([ + return self._lazy_read( gen_resource_variable_ops.assign_variable_op( self.handle, value_tensor, - name=name) - ]): - return self.read_value() + name=name)) def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask): - with ops.control_dependencies([ + return self._lazy_read( gen_array_ops.resource_strided_slice_assign( ref=self.handle, begin=begin, @@ -826,9 +826,12 @@ class ResourceVariable(variables.Variable): end_mask=end_mask, ellipsis_mask=ellipsis_mask, new_axis_mask=new_axis_mask, - shrink_axis_mask=shrink_axis_mask) - ]): - return self.value() + shrink_axis_mask=shrink_axis_mask)) + + def __int__(self): + if self.dtype != dtypes.int32 and self.dtype != dtypes.int64: + raise TypeError("Non-integer variable can't be converted to integer.") + return int(self.value().numpy()) def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False): del name @@ -886,6 +889,57 @@ def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access +class _UnreadVariable(ResourceVariable): + """Represents a future for a read of a variable. + + Pretends to be the tensor if anyone looks. + """ + + def __init__(self, handle, dtype, handle_device, # pylint: disable=super-init-not-called + shape, in_graph_mode, deleter, parent_op): + # We do not call super init on purpose. + self._trainable = False + self._save_slice_info = None + self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + self._in_graph_mode = in_graph_mode + self._handle = handle + self._handle_device = handle_device + self._shape = shape + self._initial_value = None + if isinstance(self._handle, ops.EagerTensor): + self._handle_name = "" + else: + self._handle_name = self._handle.name + self._dtype = dtype + self._constraint = None + self._cached_value = None + self._is_initialized_op = None + self._initializer_op = None + self._parent_op = parent_op + if context.in_graph_mode(): + self._graph_element = self.read_value() + else: + self._graph_element = None + self._handle_deleter = deleter + + def value(self): + return self._read_variable_op() + + def read_value(self): + return self._read_variable_op() + + def _read_variable_op(self): + with ops.control_dependencies([self._parent_op]): + return gen_resource_variable_ops.read_variable_op(self._handle, + self._dtype) + + def op(self): + """The op for this variable.""" + return self._parent_op + +ops.register_tensor_conversion_function(_UnreadVariable, _dense_var_to_tensor) +ops.register_dense_tensor_like_type(_UnreadVariable) + # Register a conversion function which reads the value of the variable, # allowing instances of the class to be used as tensors. diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 3cc76fdbf3..1323df5a17 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -353,11 +353,9 @@ def scatter_update(ref, indices, updates, use_locking=True, name=None): if ref.dtype._is_ref_dtype: return gen_state_ops.scatter_update(ref, indices, updates, use_locking=use_locking, name=name) - with ops.control_dependencies( - [gen_resource_variable_ops.resource_scatter_update( - ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), - name=name)]): - return ref.read_value() + return ref._lazy_read(gen_resource_variable_ops.resource_scatter_update( # pylint: disable=protected-access + ref.handle, indices, ops.convert_to_tensor(updates, ref.dtype), + name=name)) @tf_export("scatter_nd_update") -- GitLab From d0f4faeb2de8843d6996cba96c70af29feba3876 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 13 Feb 2018 17:26:18 -0800 Subject: [PATCH 153/629] Don't release kInvalidHandle. Also added a little more debug information. PiperOrigin-RevId: 185615169 --- .../core/common_runtime/process_function_library_runtime.cc | 2 +- tensorflow/core/kernels/data/captured_function.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index f9d9633bee..e205e34aa0 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -246,7 +246,7 @@ Status ProcessFunctionLibraryRuntime::ReleaseHandle( string target_device; { mutex_lock l(mu_); - CHECK_EQ(1, function_data_.count(handle)); + CHECK_EQ(1, function_data_.count(handle)) << " handle: " << handle; target_device = function_data_[handle].target_device; } flr = GetFLR(target_device); diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index f248f7897f..c4aa9ec265 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -33,7 +33,7 @@ Status CapturedFunction::Create( } CapturedFunction::~CapturedFunction() { - if (lib_ != nullptr) { + if (lib_ != nullptr && f_handle_ != kInvalidHandle) { lib_->ReleaseHandle(f_handle_).IgnoreError(); } } -- GitLab From 8742df3a115e0714fb25fb7ae199de93be205ab0 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 13 Feb 2018 17:52:50 -0800 Subject: [PATCH 154/629] TensorBoard debugger plugin: SIGINT handler for easier termination of debugged runtime from TensorBoardDebugWrapperSession and TensorBoardDebugHook. PiperOrigin-RevId: 185617989 --- .../python/debug/wrappers/grpc_wrapper.py | 27 +++++++++++++++++++ tensorflow/python/debug/wrappers/hooks.py | 1 + 2 files changed, 28 insertions(+) diff --git a/tensorflow/python/debug/wrappers/grpc_wrapper.py b/tensorflow/python/debug/wrappers/grpc_wrapper.py index 74d7c2b9e2..fb9494f576 100644 --- a/tensorflow/python/debug/wrappers/grpc_wrapper.py +++ b/tensorflow/python/debug/wrappers/grpc_wrapper.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import signal +import sys import traceback # Google-internal import(s). @@ -137,6 +139,29 @@ class GrpcDebugWrapperSession(framework.NonInteractiveDebugWrapperSession): if not address.startswith(common.GRPC_URL_PREFIX) else address) +def _signal_handler(unused_signal, unused_frame): + try: + input_func = raw_input + except NameError: + # Python 3 does not have raw_input. + input_func = input + + while True: + response = input_func("\nSIGINT received. Quit program? (Y/n): ").strip() + if response in ("", "Y", "y"): + sys.exit(0) + elif response in ("N", "n"): + break + + +def register_signal_handler(): + try: + signal.signal(signal.SIGINT, _signal_handler) + except ValueError: + # This can happen if we are not in the MainThread. + pass + + class TensorBoardDebugWrapperSession(GrpcDebugWrapperSession): """A tfdbg Session wrapper that can be used with TensorBoard Debugger Plugin. @@ -185,6 +210,8 @@ class TensorBoardDebugWrapperSession(GrpcDebugWrapperSession): # sent to the debug servers. self._sent_graph_version = -1 + register_signal_handler() + def run(self, fetches, feed_dict=None, diff --git a/tensorflow/python/debug/wrappers/hooks.py b/tensorflow/python/debug/wrappers/hooks.py index 0204254cca..6705cd31e2 100644 --- a/tensorflow/python/debug/wrappers/hooks.py +++ b/tensorflow/python/debug/wrappers/hooks.py @@ -345,6 +345,7 @@ class TensorBoardDebugHook(GrpcDebugHook): self._grpc_debug_server_addresses = grpc_debug_server_addresses self._send_traceback_and_source_code = send_traceback_and_source_code self._sent_graph_version = -1 + grpc_wrapper.register_signal_handler() def before_run(self, run_context): if self._send_traceback_and_source_code: -- GitLab From e5840b71a2199ec4b1f04281a7c45cbb4157c510 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 17:53:13 -0800 Subject: [PATCH 155/629] Internal Change. PiperOrigin-RevId: 185618034 --- tensorflow/SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/SECURITY.md b/tensorflow/SECURITY.md index 074eed2951..6ddac1f964 100644 --- a/tensorflow/SECURITY.md +++ b/tensorflow/SECURITY.md @@ -235,5 +235,5 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= | Type | Versions affected | Reported by | Additional Information | |------|:-----------------:|---------------------------------------| -| out of bounds read| <=1.4 | @zhangbo5891001 | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| out of bounds read| <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From 7325919e07e2ae45b3b5436db1dc9f26a51af6c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 13 Feb 2018 18:57:53 -0800 Subject: [PATCH 156/629] Automated g4 rollback of changelist 185598764 PiperOrigin-RevId: 185623948 --- .../compiler/xla/service/copy_insertion.cc | 3 +- .../compiler/xla/service/heap_simulator.cc | 1 - .../compiler/xla/service/hlo_computation.cc | 7 +- .../compiler/xla/service/hlo_computation.h | 8 - tensorflow/compiler/xla/service/hlo_module.cc | 15 -- .../compiler/xla/service/hlo_ordering.cc | 16 -- .../xla/service/hlo_rematerialization.cc | 1 - .../compiler/xla/service/layout_assignment.cc | 212 ++++++------------ .../xla/service/layout_assignment_test.cc | 63 ------ 9 files changed, 75 insertions(+), 251 deletions(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index c812df4235..cd983bc03e 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -729,8 +729,7 @@ class CopyRemover { // has a different operand (the operand of the elided copy). for (const HloUse* copy_use : copy_value_node->uses) { operand_node->uses.push_back(copy_use); - if (copy_use->instruction->opcode() == HloOpcode::kCopy && - ContainsKey(copy_map_, copy_use->instruction)) { + if (copy_use->instruction->opcode() == HloOpcode::kCopy) { copy_map_.at(copy_use->instruction).src = operand_node; } } diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index a2d13c013c..cde5877e29 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -225,7 +225,6 @@ Status HeapSimulator::RunComputation( // sub-computations will never be run concurrently. if (module_sequence_ != nullptr) { if (instruction->opcode() == HloOpcode::kCall || - instruction->opcode() == HloOpcode::kConditional || instruction->opcode() == HloOpcode::kWhile) { for (const HloComputation* called_computation : instruction->called_computations()) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 21e6b2ca73..5432419e4a 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -509,14 +509,13 @@ StatusOr HloComputation::DeepCopyInstruction( "Can't deep copy instruction %s: instruction is not in computation %s", instruction->name().c_str(), name().c_str()); } + if (indices_to_copy != nullptr && !ShapeUtil::Compatible(instruction->shape(), indices_to_copy->shape())) { return FailedPrecondition( "Can't deep copy instruction %s: given shape tree of indices to copy " - "has incompatible shapes: %s vs. %s", - instruction->name().c_str(), - ShapeUtil::HumanString(instruction->shape()).c_str(), - ShapeUtil::HumanString(indices_to_copy->shape()).c_str()); + "has incompatible shape", + instruction->name().c_str()); } ShapeIndex index; diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 39d864efcb..061c59abe5 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -77,14 +77,6 @@ class HloComputation { return last_added_instruction_; } - Status ForEachInstruction( - const std::function& func) const { - for (const auto& instruction : instructions_) { - TF_RETURN_IF_ERROR(func(instruction.get())); - } - return Status::OK(); - } - private: const string name_; HloInstruction* last_added_instruction_; diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 1e0e17f22f..60270b0595 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -145,21 +145,6 @@ void HloModule::ReplaceComputations( } break; } - case HloOpcode::kConditional: { - HloComputation* new_true_computation = - tensorflow::gtl::FindWithDefault( - replacements, instruction->true_computation(), nullptr); - if (new_true_computation != nullptr) { - instruction->set_true_computation(new_true_computation); - } - HloComputation* new_false_computation = - tensorflow::gtl::FindWithDefault( - replacements, instruction->false_computation(), nullptr); - if (new_false_computation != nullptr) { - instruction->set_false_computation(new_false_computation); - } - break; - } case HloOpcode::kSelectAndScatter: { HloComputation* new_select = tensorflow::gtl::FindWithDefault( replacements, instruction->select(), nullptr); diff --git a/tensorflow/compiler/xla/service/hlo_ordering.cc b/tensorflow/compiler/xla/service/hlo_ordering.cc index 1b24d8da9e..68e3c9618c 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering.cc @@ -186,22 +186,6 @@ bool HloOrdering::UseIsBeforeValueDefinition( } } - if (use.instruction->opcode() == HloOpcode::kConditional) { - const HloInstruction* conditional = use.instruction; - if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), - conditional->true_computation())) { - VLOG(4) << " use is conditional " << use.instruction->name() - << " and def is in TRUE computation"; - return true; - } - if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), - conditional->false_computation())) { - VLOG(4) << " use is conditional " << use.instruction->name() - << " and def is in FALSE computation"; - return true; - } - } - VLOG(4) << " use is not before value"; return false; } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 98b8d34be1..c6b4dc0368 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -60,7 +60,6 @@ bool IsRematerializable(const HloInstruction* instruction) { switch (instruction->opcode()) { case HloOpcode::kCall: case HloOpcode::kConstant: - case HloOpcode::kConditional: case HloOpcode::kCrossReplicaSum: case HloOpcode::kCustomCall: case HloOpcode::kParameter: diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 0668f66051..fce135ef61 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -53,83 +53,6 @@ limitations under the License. namespace xla { -// For now moving only one API here, but we should have a single top level -// anonymous namespace, instead of three or four spread all over this file. -namespace { - -// Creates and returns a copy of the given instruction with a different -// layout. Tuple-shaped instructions will be deep-copied, and the last Tuple -// instruction producing the copy is returned. -StatusOr CreateCopyWithNewLayout( - const Shape& shape_with_layout, HloInstruction* instruction) { - TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); - DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) - << ShapeUtil::HumanString(shape_with_layout) << " " - << ShapeUtil::HumanString(instruction->shape()) - << " instruction: " << instruction->ToString(); - - if (ShapeUtil::IsTuple(instruction->shape())) { - // Deep-copy tuples. - std::vector element_copies; - for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); - ++i) { - HloInstruction* gte = instruction->parent()->AddInstruction( - HloInstruction::CreateGetTupleElement( - ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, - i)); - - // Recurse to copy each elements. - TF_ASSIGN_OR_RETURN( - HloInstruction * element_copy, - CreateCopyWithNewLayout( - ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); - element_copies.push_back(element_copy); - } - // Gather element copies into a tuple with a new Tuple instruction. - HloInstruction* tuple_copy = instruction->parent()->AddInstruction( - HloInstruction::CreateTuple(element_copies)); - LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, tuple_copy->mutable_shape())); - return tuple_copy; - } else if (ShapeUtil::IsArray(instruction->shape())) { - HloInstruction* copy = - instruction->parent()->AddInstruction(HloInstruction::CreateUnary( - instruction->shape(), HloOpcode::kCopy, instruction)); - LayoutUtil::ClearLayout(copy->mutable_shape()); - TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( - shape_with_layout, copy->mutable_shape())); - - return copy; - } else { - return FailedPrecondition( - "Can only copy array and tuple shaped instructions"); - } -} - -// Creates a copy of the given operand if the operand's layout does not match -// the given layout. This copy replaces the use in the given instruction. Tuple -// operands will be deep-copied. -Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, - HloInstruction* instruction, - int64 operand_no) { - HloInstruction* operand = instruction->mutable_operand(operand_no); - TF_RET_CHECK(operand_layout.LayoutIsSet()); - TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); - - if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { - // Operand layout already matches our constraint. Nothing to do. - return Status::OK(); - } - - TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, - CreateCopyWithNewLayout(operand_layout.shape(), operand)); - - return instruction->ReplaceOperandWith(operand_no, operand_copy); -} - -} // namespace - std::ostream& operator<<(std::ostream& out, const LayoutConstraint& constraint) { out << constraint.ToString(); @@ -589,36 +512,6 @@ Status LayoutAssignment::AddMandatoryConstraints( body_layout.result_shape(), instruction)); TF_RETURN_IF_ERROR(constraints->SetOperandLayout( body_layout.result_shape(), instruction, 0)); - } else if (instruction->opcode() == HloOpcode::kConditional) { - // The layout of the true and false computations must match, and must - // be the layout of the kConditional instruction. - TF_RET_CHECK(instruction->operand_count() == 3); - - HloComputation* true_computation = instruction->true_computation(); - HloComputation* false_computation = instruction->false_computation(); - const HloInstruction* true_operand = instruction->operand(1); - const HloInstruction* false_operand = instruction->operand(2); - - TF_RET_CHECK(true_computation->num_parameters() == 1); - TF_RET_CHECK(false_computation->num_parameters() == 1); - ComputationLayout& true_computation_layout = - FindOrDie(computation_layouts_, true_computation); - ComputationLayout& false_computation_layout = - FindOrDie(computation_layouts_, false_computation); - - DCHECK(ShapeUtil::Compatible(true_operand->shape(), - true_computation_layout.parameter_shape(0))); - DCHECK(ShapeUtil::Compatible( - false_operand->shape(), false_computation_layout.parameter_shape(0))); - - TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( - true_computation_layout.result_shape(), instruction)); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout( - true_computation_layout.parameter_shape(0), instruction, 1, - /*mandatory=*/true)); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout( - false_computation_layout.parameter_shape(0), instruction, 2, - /*mandatory=*/true)); } else if (instruction->opcode() == HloOpcode::kCustomCall) { if (!CustomCallRequiresMajorFirstLayout(instruction)) { continue; @@ -705,33 +598,6 @@ Status CheckWhileLayout(HloInstruction* while_inst, return Status::OK(); } -Status CheckConditionalLayout( - HloInstruction* instruction, - const ComputationLayout& true_computation_layout, - const ComputationLayout& false_computation_layout) { - HloComputation* true_computation = instruction->true_computation(); - HloComputation* false_computation = instruction->false_computation(); - const HloInstruction* true_operand = instruction->operand(1); - const HloInstruction* false_operand = instruction->operand(2); - - TF_RET_CHECK(true_computation_layout.result_layout() == - false_computation_layout.result_layout()); - TF_RET_CHECK(true_computation_layout.result_layout().MatchesLayoutInShape( - instruction->shape())); - TF_RET_CHECK(true_computation_layout.result_layout().MatchesLayoutInShape( - true_computation->root_instruction()->shape())); - TF_RET_CHECK(false_computation_layout.result_layout().MatchesLayoutInShape( - instruction->shape())); - TF_RET_CHECK(false_computation_layout.result_layout().MatchesLayoutInShape( - false_computation->root_instruction()->shape())); - TF_RET_CHECK(true_computation_layout.parameter_layout(0).MatchesLayoutInShape( - true_operand->shape())); - TF_RET_CHECK( - false_computation_layout.parameter_layout(0).MatchesLayoutInShape( - false_operand->shape())); - return Status::OK(); -} - // Fusion parameters must match the layout of the fusion instructions operands, // and the root of the fusion expression must match the layout of the fusion // instruction. @@ -844,13 +710,6 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, instruction->while_condition()), FindOrDie(computation_layouts_, instruction->while_body()))); break; - case HloOpcode::kConditional: - TF_RETURN_IF_ERROR(CheckConditionalLayout( - instruction, - FindOrDie(computation_layouts_, instruction->true_computation()), - FindOrDie(computation_layouts_, - instruction->false_computation()))); - break; default: break; } @@ -1306,6 +1165,77 @@ StatusOr InferArrayLayout( return *first_buffer_layout; } +// Creates and returns a copy of the given instruction with a different +// layout. Tuple-shaped instructions will be deep-copied, and the last Tuple +// instruction producing the copy is returned. +StatusOr CreateCopyWithNewLayout( + const Shape& shape_with_layout, HloInstruction* instruction) { + TF_RET_CHECK(LayoutUtil::HasLayout(shape_with_layout)); + DCHECK(ShapeUtil::Compatible(shape_with_layout, instruction->shape())) + << ShapeUtil::HumanString(shape_with_layout) << " " + << ShapeUtil::HumanString(instruction->shape()) + << " instruction: " << instruction->ToString(); + + if (ShapeUtil::IsTuple(instruction->shape())) { + // Deep-copy tuples. + std::vector element_copies; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(instruction->shape()); + ++i) { + HloInstruction* gte = instruction->parent()->AddInstruction( + HloInstruction::CreateGetTupleElement( + ShapeUtil::GetSubshape(instruction->shape(), {i}), instruction, + i)); + + // Recurse to copy each elements. + TF_ASSIGN_OR_RETURN( + HloInstruction * element_copy, + CreateCopyWithNewLayout( + ShapeUtil::GetSubshape(shape_with_layout, {i}), gte)); + element_copies.push_back(element_copy); + } + // Gather element copies into a tuple with a new Tuple instruction. + HloInstruction* tuple_copy = instruction->parent()->AddInstruction( + HloInstruction::CreateTuple(element_copies)); + LayoutUtil::ClearLayout(tuple_copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, tuple_copy->mutable_shape())); + return tuple_copy; + } else if (ShapeUtil::IsArray(instruction->shape())) { + HloInstruction* copy = + instruction->parent()->AddInstruction(HloInstruction::CreateUnary( + instruction->shape(), HloOpcode::kCopy, instruction)); + LayoutUtil::ClearLayout(copy->mutable_shape()); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + shape_with_layout, copy->mutable_shape())); + + return copy; + } else { + return FailedPrecondition( + "Can only copy array and tuple shaped instructions"); + } +} + +// Creates a copy of the given operand if the operand's layout does not match +// the given layout. This copy replaces the use in the given instruction. Tuple +// operands will be deep-copied. +Status CopyOperandIfLayoutsDiffer(const ShapeLayout& operand_layout, + HloInstruction* instruction, + int64 operand_no) { + HloInstruction* operand = instruction->mutable_operand(operand_no); + TF_RET_CHECK(operand_layout.LayoutIsSet()); + TF_RET_CHECK(LayoutUtil::HasLayout(operand->shape())); + + if (ShapeUtil::Equal(operand_layout.shape(), operand->shape())) { + // Operand layout already matches our constraint. Nothing to do. + return Status::OK(); + } + + TF_ASSIGN_OR_RETURN(HloInstruction * operand_copy, + CreateCopyWithNewLayout(operand_layout.shape(), operand)); + + return instruction->ReplaceOperandWith(operand_no, operand_copy); +} + // For fusion instructions, set the layout of each fused parameter instruction // to match the layout of its corresponding fusion instruction operand. Also, // set the layout of the fused root to match the layout of the fusion diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index dd0fba2758..e269a13459 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -658,68 +658,5 @@ TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { ElementsAre(2, 1, 0)); } -TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) { - auto builder = HloComputation::Builder(TestName()); - auto module = CreateNewModule(); - Shape shape = ShapeUtil::MakeShape(F32, {128, 8}); - Shape tshape = ShapeUtil::MakeTupleShape({shape, shape}); - Shape result_tshape = ShapeUtil::MakeTupleShape({shape}); - - auto param0 = builder.AddInstruction( - HloInstruction::CreateParameter(0, shape, "param0")); - auto param1 = builder.AddInstruction( - HloInstruction::CreateParameter(1, shape, "param1")); - auto pred = builder.AddInstruction(HloInstruction::CreateParameter( - 2, ShapeUtil::MakeShape(PRED, {}), "param2")); - auto tuple = - builder.AddInstruction(HloInstruction::CreateTuple({param0, param1})); - - auto true_builder = HloComputation::Builder(TestName() + "_TrueBranch"); - { - auto param = true_builder.AddInstruction( - HloInstruction::CreateParameter(0, tshape, "param")); - auto gte0 = true_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(shape, param, 0)); - auto gte1 = true_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(shape, param, 1)); - auto add = true_builder.AddInstruction( - HloInstruction::CreateBinary(shape, HloOpcode::kAdd, gte0, gte1)); - true_builder.AddInstruction(HloInstruction::CreateTuple({add})); - } - HloComputation* true_computation = - module->AddEmbeddedComputation(true_builder.Build()); - - auto false_builder = HloComputation::Builder(TestName() + "_FalseBranch"); - { - Shape xshape = ShapeUtil::MakeShapeWithLayout(F32, {128, 8}, {0, 1}); - false_builder.AddInstruction( - HloInstruction::CreateParameter(0, tshape, "param")); - // Using infeed as layout assignment does not mess up with it. - auto infeed = - false_builder.AddInstruction(HloInstruction::CreateInfeed(xshape, "")); - false_builder.AddInstruction(HloInstruction::CreateTuple({infeed})); - } - HloComputation* false_computation = - module->AddEmbeddedComputation(false_builder.Build()); - builder.AddInstruction(HloInstruction::CreateConditional( - result_tshape, pred, tuple, true_computation, tuple, false_computation)); - - HloComputation* computation = module->AddEntryComputation(builder.Build()); - ComputationLayout computation_layout(computation->ComputeProgramShape()); - - AssignLayouts(module.get(), &computation_layout); - - const HloInstruction* true_root = true_computation->root_instruction(); - const HloInstruction* false_root = false_computation->root_instruction(); - EXPECT_THAT(true_root->opcode(), HloOpcode::kTuple); - EXPECT_THAT(false_root->opcode(), HloOpcode::kTuple); - - const HloInstruction* true_result = true_root->operand(0); - const HloInstruction* false_result = false_root->operand(0); - EXPECT_TRUE(LayoutUtil::Equal(true_result->shape().layout(), - false_result->shape().layout())); - EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy); -} - } // namespace } // namespace xla -- GitLab From e23948da81a007b27869a75c4a7dbe8f91ea8c03 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 13 Feb 2018 19:16:08 -0800 Subject: [PATCH 157/629] For models running in Eager mode, do not update the weights of the BatchNorm layer if the layer's trainable argument is False. This change is required in Eager mode to freeze a layer's weights when we set the layer's trainable attribute to False. This should not be confused with the "training" attribute which refers to a model's training or inference mode behavior. PiperOrigin-RevId: 185625661 --- tensorflow/python/layers/normalization.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 656d566ab5..323a9f8ee3 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -493,6 +493,7 @@ class BatchNormalization(base.Layer): return (r, d, new_mean, new_variance) def call(self, inputs, training=False): + in_eager_mode = context.in_eager_mode() if self.virtual_batch_size is not None: # Virtual batches (aka ghost batches) can be simulated by reshaping the # Tensor and reusing the existing batch norm implementation @@ -595,6 +596,9 @@ class BatchNormalization(base.Layer): axis=1, keep_dims=True) def _do_update(var, value): + if in_eager_mode and not self.trainable: + return + return moving_averages.assign_moving_average( var, value, self.momentum, zero_debias=False) -- GitLab From 95fae75e4d15c59a43d8eaf150b8c32c7c6d1495 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Tue, 13 Feb 2018 20:07:42 -0800 Subject: [PATCH 158/629] Fixing MklCPUAllocator error introduced by commit #1baac7862739525351d25202800dc04e8ec3868b --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 0eb47f4e56..2a67c039ac 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -25,7 +25,7 @@ limitations under the License. #include #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" @@ -161,7 +161,7 @@ class MklCPUAllocator : public VisitableAllocator { /// The alignment that we need for the allocations static const size_t kAlignment = 64; - Allocator* allocator_; // owned by this class + VisitableAllocator* allocator_; // owned by this class }; } // namespace tensorflow -- GitLab From 5ba36396388a7e241e25133e7aa312b9f1c6a74c Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 13 Feb 2018 20:39:33 -0800 Subject: [PATCH 159/629] Fix tf.keras progbar display. PiperOrigin-RevId: 185632155 --- .../python/keras/_impl/keras/callbacks.py | 2 +- .../keras/_impl/keras/utils/generic_utils.py | 112 ++++++++++-------- .../tensorflow.keras.utils.-progbar.pbtxt | 4 +- 3 files changed, 66 insertions(+), 52 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index b29bc39232..de013c7c3f 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -328,7 +328,7 @@ class ProgbarLogger(Callback): if k in logs: self.log_values.append((k, logs[k])) if self.verbose: - self.progbar.update(self.seen, self.log_values, force=True) + self.progbar.update(self.seen, self.log_values) @tf_export('keras.callbacks.History') diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index fa3b55c59e..462d600bf8 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -291,55 +291,73 @@ class Progbar(object): Arguments: target: Total number of steps expected, None if unknown. + width: Progress bar width on screen. + verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over time. Metrics in this list + will be displayed as-is. All others will be averaged + by the progbar before display. interval: Minimum visual progress update interval (in seconds). """ - def __init__(self, target, width=30, verbose=1, interval=0.05): - self.width = width - if target is None: - target = -1 + def __init__(self, target, width=30, verbose=1, interval=0.05, + stateful_metrics=None): self.target = target - self.sum_values = {} - self.unique_values = [] - self.start = time.time() - self.last_update = 0 - self.interval = interval - self.total_width = 0 - self.seen_so_far = 0 + self.width = width self.verbose = verbose + self.interval = interval + if stateful_metrics: + self.stateful_metrics = set(stateful_metrics) + else: + self.stateful_metrics = set() + self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()) or - 'ipykernel' in sys.modules) - - def update(self, current, values=None, force=False): + 'ipykernel' in sys.modules or + 'posix' in sys.modules) + self._total_width = 0 + self._seen_so_far = 0 + # We use a dict + list to avoid garbage collection + # issues found in OrderedDict + self._values = {} + self._values_order = [] + self._start = time.time() + self._last_update = 0 + + def update(self, current, values=None): """Updates the progress bar. Arguments: current: Index of current step. - values: List of tuples (name, value_for_last_step). - The progress bar will display averages for these values. - force: Whether to force visual progress update. + values: List of tuples: + `(name, value_for_last_step)`. + If `name` is in `stateful_metrics`, + `value_for_last_step` will be displayed as-is. + Else, an average of the metric over time will be displayed. """ values = values or [] for k, v in values: - if k not in self.sum_values: - self.sum_values[k] = [ - v * (current - self.seen_so_far), current - self.seen_so_far - ] - self.unique_values.append(k) + if k not in self._values_order: + self._values_order.append(k) + if k not in self.stateful_metrics: + if k not in self._values: + self._values[k] = [v * (current - self._seen_so_far), + current - self._seen_so_far] + else: + self._values[k][0] += v * (current - self._seen_so_far) + self._values[k][1] += (current - self._seen_so_far) else: - self.sum_values[k][0] += v * (current - self.seen_so_far) - self.sum_values[k][1] += (current - self.seen_so_far) - self.seen_so_far = current + self._values[k] = v + self._seen_so_far = current now = time.time() - info = ' - %.0fs' % (now - self.start) + info = ' - %.0fs' % (now - self._start) if self.verbose == 1: - if (not force and (now - self.last_update) < self.interval and - current < self.target): + if (now - self._last_update < self.interval and + self.target is not None and current < self.target): return - prev_total_width = self.total_width + prev_total_width = self._total_width if self._dynamic_display: sys.stdout.write('\b' * prev_total_width) sys.stdout.write('\r') @@ -360,22 +378,21 @@ class Progbar(object): bar += '=' bar += ('.' * (self.width - prog_width)) bar += ']' - sys.stdout.write(bar) - self.total_width = len(bar) else: bar = '%7d/Unknown' % current - self.total_width = len(bar) + self._total_width = len(bar) sys.stdout.write(bar) if current: - time_per_unit = (now - self.start) / current + time_per_unit = (now - self._start) / current else: time_per_unit = 0 if self.target is not None and current < self.target: eta = time_per_unit * (self.target - current) if eta > 3600: - eta_format = '%d:%02d:%02d' % (eta // 3600, (eta % 3600) // 60, + eta_format = '%d:%02d:%02d' % (eta // 3600, + (eta % 3600) // 60, eta % 60) elif eta > 60: eta_format = '%d:%02d' % (eta // 60, eta % 60) @@ -391,35 +408,32 @@ class Progbar(object): else: info += ' %.0fus/step' % (time_per_unit * 1e6) - for k in self.unique_values: + for k in self._values_order: info += ' - %s:' % k - if isinstance(self.sum_values[k], list): - avg = np.mean(self.sum_values[k][0] / max(1, self.sum_values[k][1])) + if isinstance(self._values[k], list): + avg = np.mean(self._values[k][0] / max(1, self._values[k][1])) if abs(avg) > 1e-3: info += ' %.4f' % avg else: info += ' %.4e' % avg else: - info += ' %s' % self.sum_values[k] + info += ' %s' % self._values[k] + + self._total_width += len(info) + if prev_total_width > self._total_width: + info += (' ' * (prev_total_width - self._total_width)) - self.total_width += len(info) - if prev_total_width > self.total_width: - info += (' ' * (prev_total_width - self.total_width)) if self.target is not None and current >= self.target: info += '\n' sys.stdout.write(info) sys.stdout.flush() - if current >= self.target: - sys.stdout.write('\n') - elif self.verbose == 2: if self.target is None or current >= self.target: - for k in self.unique_values: + for k in self._values_order: info += ' - %s:' % k - avg = np.mean( - self.sum_values[k][0] / max(1, self.sum_values[k][1])) + avg = np.mean(self._values[k][0] / max(1, self._values[k][1])) if avg > 1e-3: info += ' %.4f' % avg else: @@ -429,10 +443,10 @@ class Progbar(object): sys.stdout.write(info) sys.stdout.flush() - self.last_update = now + self._last_update = now def add(self, n, values=None): - self.update(self.seen_so_far + n, values) + self.update(self._seen_so_far + n, values) def make_batches(size, batch_size): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.utils.-progbar.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.utils.-progbar.pbtxt index 3adc6b6faa..16e1cbe650 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.utils.-progbar.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.utils.-progbar.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'target\', \'width\', \'verbose\', \'interval\'], varargs=None, keywords=None, defaults=[\'30\', \'1\', \'0.05\'], " + argspec: "args=[\'self\', \'target\', \'width\', \'verbose\', \'interval\', \'stateful_metrics\'], varargs=None, keywords=None, defaults=[\'30\', \'1\', \'0.05\', \'None\'], " } member_method { name: "add" @@ -12,6 +12,6 @@ tf_class { } member_method { name: "update" - argspec: "args=[\'self\', \'current\', \'values\', \'force\'], varargs=None, keywords=None, defaults=[\'None\', \'False\'], " + argspec: "args=[\'self\', \'current\', \'values\'], varargs=None, keywords=None, defaults=[\'None\'], " } } -- GitLab From 6aad89bb560e4bbeafff9d0c42cc8983ab4a3499 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Tue, 13 Feb 2018 21:45:08 -0800 Subject: [PATCH 160/629] [TF:XLA] Tag reverse_sequence test as optonly, increase its timeout. PiperOrigin-RevId: 185637431 --- tensorflow/compiler/tests/BUILD | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index b49d2ca961..782bf82d41 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -479,8 +479,9 @@ tf_xla_py_test( tf_xla_py_test( name = "reverse_sequence_op_test", - size = "small", + size = "medium", srcs = ["reverse_sequence_op_test.py"], + tags = ["optonly"], deps = [ ":xla_test", "//tensorflow/python:array_ops", -- GitLab From 14b6365a36c8982092ed2010e1e90f66f663deeb Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Tue, 13 Feb 2018 21:49:28 -0800 Subject: [PATCH 161/629] tfe SPINN example: Add inference; fix serialization * Also de-flake a test. PiperOrigin-RevId: 185637742 --- .../contrib/eager/python/examples/spinn/BUILD | 6 +- .../eager/python/examples/spinn/data.py | 23 +++ .../eager/python/examples/spinn/data_test.py | 51 +++++-- .../eager/python/examples/spinn/spinn_test.py | 87 +++++++++-- third_party/examples/eager/spinn/README.md | 41 ++++++ third_party/examples/eager/spinn/spinn.py | 139 ++++++++++++++---- 6 files changed, 287 insertions(+), 60 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD index 21055cfe11..a1f8a759e2 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/BUILD +++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD @@ -38,9 +38,5 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", ], - tags = [ - "manual", - "no_gpu", - "no_pip", # because spinn.py is under third_party/. - ], + tags = ["no_pip"], # because spinn.py is under third_party/. ) diff --git a/tensorflow/contrib/eager/python/examples/spinn/data.py b/tensorflow/contrib/eager/python/examples/spinn/data.py index fcaae0a4f8..3bc3bb49bc 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/data.py +++ b/tensorflow/contrib/eager/python/examples/spinn/data.py @@ -227,6 +227,29 @@ def calculate_bins(length2count, min_bin_size): return bounds +def encode_sentence(sentence, word2index): + """Encode a single sentence as word indices and shift-reduce code. + + Args: + sentence: The sentence with added binary parse information, represented as + a string, with all the word items and parentheses separated by spaces. + E.g., '( ( The dog ) ( ( is ( playing toys ) ) . ) )'. + word2index: A `dict` mapping words to their word indices. + + Returns: + 1. Word indices as a numpy array, with shape `(sequence_len, 1)`. + 2. Shift-reduce sequence as a numpy array, with shape + `(sequence_len * 2 - 3, 1)`. + """ + items = [w for w in sentence.split(" ") if w] + words = get_non_parenthesis_words(items) + shift_reduce = get_shift_reduce(items) + word_indices = pad_and_reverse_word_ids( + [[word2index.get(word, UNK_CODE) for word in words]]).T + return (word_indices, + np.expand_dims(np.array(shift_reduce, dtype=np.int64), -1)) + + class SnliData(object): """A split of SNLI data.""" diff --git a/tensorflow/contrib/eager/python/examples/spinn/data_test.py b/tensorflow/contrib/eager/python/examples/spinn/data_test.py index e4f0b37c50..54fef2c3fe 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/data_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/data_test.py @@ -22,6 +22,7 @@ import os import shutil import tempfile +import numpy as np import tensorflow as tf from tensorflow.contrib.eager.python.examples.spinn import data @@ -173,14 +174,9 @@ class DataTest(tf.test.TestCase): ValueError, "Cannot find GloVe embedding file at"): data.load_word_vectors(self._temp_data_dir, vocab) - def testSnliData(self): - """Unit test for SnliData objects.""" - snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") - fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") - os.makedirs(snli_1_0_dir) - + def _createFakeSnliData(self, fake_snli_file): # Four sentences in total. - with open(fake_train_file, "wt") as f: + with open(fake_snli_file, "wt") as f: f.write("gold_label\tsentence1_binary_parse\tsentence2_binary_parse\t" "sentence1_parse\tsentence2_parse\tsentence1\tsentence2\t" "captionID\tpairID\tlabel1\tlabel2\tlabel3\tlabel4\tlabel5\n") @@ -205,10 +201,7 @@ class DataTest(tf.test.TestCase): "4705552913.jpg#2\t4705552913.jpg#2r1n\t" "neutral\tentailment\tneutral\tneutral\tneutral\n") - glove_dir = os.path.join(self._temp_data_dir, "glove") - os.makedirs(glove_dir) - glove_file = os.path.join(glove_dir, "glove.42B.300d.txt") - + def _createFakeGloveData(self, glove_file): words = [".", "foo", "bar", "baz", "quux", "quuz", "grault", "garply"] with open(glove_file, "wt") as f: for i, word in enumerate(words): @@ -220,6 +213,40 @@ class DataTest(tf.test.TestCase): else: f.write("\n") + def testEncodeSingleSentence(self): + snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") + os.makedirs(snli_1_0_dir) + self._createFakeSnliData(fake_train_file) + vocab = data.load_vocabulary(self._temp_data_dir) + glove_dir = os.path.join(self._temp_data_dir, "glove") + os.makedirs(glove_dir) + glove_file = os.path.join(glove_dir, "glove.42B.300d.txt") + self._createFakeGloveData(glove_file) + word2index, _ = data.load_word_vectors(self._temp_data_dir, vocab) + + sentence_variants = [ + "( Foo ( ( bar baz ) . ) )", + " ( Foo ( ( bar baz ) . ) ) ", + "( Foo ( ( bar baz ) . ) )"] + for sentence in sentence_variants: + word_indices, shift_reduce = data.encode_sentence(sentence, word2index) + self.assertEqual(np.int64, word_indices.dtype) + self.assertEqual((5, 1), word_indices.shape) + self.assertAllClose( + np.array([[3, 3, 3, 2, 3, 2, 2]], dtype=np.int64).T, shift_reduce) + + def testSnliData(self): + snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") + os.makedirs(snli_1_0_dir) + self._createFakeSnliData(fake_train_file) + + glove_dir = os.path.join(self._temp_data_dir, "glove") + os.makedirs(glove_dir) + glove_file = os.path.join(glove_dir, "glove.42B.300d.txt") + self._createFakeGloveData(glove_file) + vocab = data.load_vocabulary(self._temp_data_dir) word2index, _ = data.load_word_vectors(self._temp_data_dir, vocab) @@ -230,7 +257,7 @@ class DataTest(tf.test.TestCase): self.assertEqual(1, train_data.num_batches(4)) generator = train_data.get_generator(2)() - for i in range(2): + for _ in range(2): label, prem, prem_trans, hypo, hypo_trans = next(generator) self.assertEqual(2, len(label)) self.assertEqual((4, 2), prem.shape) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index 7b2f09cba1..eefc06d90d 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -36,6 +36,7 @@ from third_party.examples.eager.spinn import spinn from tensorflow.contrib.summary import summary_test_util from tensorflow.python.eager import test from tensorflow.python.framework import test_util +from tensorflow.python.training import checkpoint_utils # pylint: enable=g-bad-import-order @@ -66,13 +67,30 @@ def _generate_synthetic_snli_data_batch(sequence_length, return labels, prem, prem_trans, hypo, hypo_trans -def _test_spinn_config(d_embed, d_out, logdir=None): +def _test_spinn_config(d_embed, d_out, logdir=None, inference_sentences=None): + """Generate a config tuple for testing. + + Args: + d_embed: Embedding dimensions. + d_out: Model output dimensions. + logdir: Optional logdir. + inference_sentences: A 2-tuple of strings representing the sentences (with + binary parsing result), e.g., + ("( ( The dog ) ( ( is running ) . ) )", "( ( The dog ) ( moves . ) )"). + + Returns: + A config tuple. + """ config_tuple = collections.namedtuple( "Config", ["d_hidden", "d_proj", "d_tracker", "predict", "embed_dropout", "mlp_dropout", "n_mlp_layers", "d_mlp", "d_out", "projection", "lr", "batch_size", "epochs", "force_cpu", "logdir", "log_every", "dev_every", "save_every", - "lr_decay_every", "lr_decay_by"]) + "lr_decay_every", "lr_decay_by", "inference_premise", + "inference_hypothesis"]) + + inference_premise = inference_sentences[0] if inference_sentences else None + inference_hypothesis = inference_sentences[1] if inference_sentences else None return config_tuple( d_hidden=d_embed, d_proj=d_embed * 2, @@ -86,14 +104,16 @@ def _test_spinn_config(d_embed, d_out, logdir=None): projection=True, lr=2e-2, batch_size=2, - epochs=10, + epochs=20, force_cpu=False, logdir=logdir, log_every=1, dev_every=2, save_every=2, lr_decay_every=1, - lr_decay_by=0.75) + lr_decay_by=0.75, + inference_premise=inference_premise, + inference_hypothesis=inference_hypothesis) class SpinnTest(test_util.TensorFlowTestCase): @@ -288,11 +308,7 @@ class SpinnTest(test_util.TensorFlowTestCase): # Training on the batch should have led to a change in the loss value. self.assertNotEqual(loss1.numpy(), loss2.numpy()) - def testTrainSpinn(self): - """Test with fake toy SNLI data and GloVe vectors.""" - - # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. - snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + def _create_test_data(self, snli_1_0_dir): fake_train_file = os.path.join(snli_1_0_dir, "snli_1.0_train.txt") os.makedirs(snli_1_0_dir) @@ -337,13 +353,52 @@ class SpinnTest(test_util.TensorFlowTestCase): else: f.write("\n") + return fake_train_file + + def testInferSpinnWorks(self): + """Test inference with the spinn model.""" + snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + self._create_test_data(snli_1_0_dir) + + vocab = data.load_vocabulary(self._temp_data_dir) + word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) + + config = _test_spinn_config( + data.WORD_VECTOR_LEN, 4, + logdir=os.path.join(self._temp_data_dir, "logdir"), + inference_sentences=("( foo ( bar . ) )", "( bar ( foo . ) )")) + logits = spinn.train_or_infer_spinn( + embed, word2index, None, None, None, config) + self.assertEqual(np.float32, logits.dtype) + self.assertEqual((3,), logits.shape) + + def testInferSpinnThrowsErrorIfOnlyOneSentenceIsSpecified(self): + snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + self._create_test_data(snli_1_0_dir) + + vocab = data.load_vocabulary(self._temp_data_dir) + word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) + + config = _test_spinn_config( + data.WORD_VECTOR_LEN, 4, + logdir=os.path.join(self._temp_data_dir, "logdir"), + inference_sentences=("( foo ( bar . ) )", None)) + with self.assertRaises(ValueError): + spinn.train_or_infer_spinn(embed, word2index, None, None, None, config) + + def testTrainSpinn(self): + """Test with fake toy SNLI data and GloVe vectors.""" + + # 1. Create and load a fake SNLI data file and a fake GloVe embedding file. + snli_1_0_dir = os.path.join(self._temp_data_dir, "snli/snli_1.0") + fake_train_file = self._create_test_data(snli_1_0_dir) + vocab = data.load_vocabulary(self._temp_data_dir) word2index, embed = data.load_word_vectors(self._temp_data_dir, vocab) train_data = data.SnliData(fake_train_file, word2index) dev_data = data.SnliData(fake_train_file, word2index) test_data = data.SnliData(fake_train_file, word2index) - print(embed) # 2. Create a fake config. config = _test_spinn_config( @@ -351,7 +406,8 @@ class SpinnTest(test_util.TensorFlowTestCase): logdir=os.path.join(self._temp_data_dir, "logdir")) # 3. Test training of a SPINN model. - spinn.train_spinn(embed, train_data, dev_data, test_data, config) + trainer = spinn.train_or_infer_spinn( + embed, word2index, train_data, dev_data, test_data, config) # 4. Load train loss values from the summary files and verify that they # decrease with training. @@ -363,6 +419,15 @@ class SpinnTest(test_util.TensorFlowTestCase): self.assertEqual(config.epochs, len(train_losses)) self.assertLess(train_losses[-1], train_losses[0]) + # 5. Verify that checkpoints exist and contains all the expected variables. + self.assertTrue(glob.glob(os.path.join(config.logdir, "ckpt*"))) + ckpt_variable_names = [ + item[0] for item in checkpoint_utils.list_variables(config.logdir)] + self.assertIn("global_step", ckpt_variable_names) + for v in trainer.variables: + variable_name = v.name[:v.name.index(":")] if ":" in v.name else v.name + self.assertIn(variable_name, ckpt_variable_names) + class EagerSpinnSNLIClassifierBenchmark(test.Benchmark): diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md index 6bd3d53e56..335c0fa3b5 100644 --- a/third_party/examples/eager/spinn/README.md +++ b/third_party/examples/eager/spinn/README.md @@ -66,3 +66,44 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth ```bash tensorboard --logdir /tmp/spinn-logs ``` + +- After training, you may use the model to perform inference on input data in + the SNLI data format. The premise and hypotheses sentences are specified with + the command-line flags `--inference_premise` and `--inference_hypothesis`, + respecitvely. Each sentence should include the words, as well as parentheses + representing a binary parsing of the sentence. The words and parentheses + should all be separated by spaces. For instance, + + ```bash + pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ + --inference_hypothesis '( ( The dog ) ( moves . ) )' + ``` + + which will generate an output like the following, due to the semantic + consistency of the two sentences. + + ```none + Inference logits: + entailment: 1.101249 (winner) + contradiction: -2.374171 + neutral: -0.296733 + ``` + + By contrast, the following sentence pair: + + ```bash + pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ + --inference_hypothesis '( ( The dog ) ( rests . ) )' + ``` + + will give you an output like the following, due to the semantic + contradiction of the two sentences. + + ```none + Inference logits: + entailment: -1.070098 + contradiction: 2.798695 (winner) + neutral: -1.402287 + ``` diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py index a2fa18eeb1..38ba48d501 100644 --- a/third_party/examples/eager/spinn/spinn.py +++ b/third_party/examples/eager/spinn/spinn.py @@ -44,6 +44,7 @@ import os import sys import time +import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf @@ -471,6 +472,15 @@ class SNLIClassifierTrainer(object): def learning_rate(self): return self._learning_rate + @property + def model(self): + return self._model + + @property + def variables(self): + return (self._model.variables + [self.learning_rate] + + self._optimizer.variables()) + def _batch_n_correct(logits, label): """Calculate number of correct predictions in a batch. @@ -488,13 +498,12 @@ def _batch_n_correct(logits, label): tf.argmax(logits, axis=1), label)), tf.float32)).numpy() -def _evaluate_on_dataset(snli_data, batch_size, model, trainer, use_gpu): +def _evaluate_on_dataset(snli_data, batch_size, trainer, use_gpu): """Run evaluation on a dataset. Args: snli_data: The `data.SnliData` to use in this evaluation. batch_size: The batch size to use during this evaluation. - model: An instance of `SNLIClassifier` to evaluate. trainer: An instance of `SNLIClassifierTrainer to use for this evaluation. use_gpu: Whether GPU is being used. @@ -509,7 +518,7 @@ def _evaluate_on_dataset(snli_data, batch_size, model, trainer, use_gpu): snli_data, batch_size): if use_gpu: label, prem, hypo = label.gpu(), prem.gpu(), hypo.gpu() - logits = model(prem, prem_trans, hypo, hypo_trans, training=False) + logits = trainer.model(prem, prem_trans, hypo, hypo_trans, training=False) loss_val = trainer.loss(label, logits) batch_size = tf.shape(label)[0] mean_loss(loss_val, weights=batch_size.gpu() if use_gpu else batch_size) @@ -536,13 +545,19 @@ def _get_dataset_iterator(snli_data, batch_size): return tfe.Iterator(dataset) -def train_spinn(embed, train_data, dev_data, test_data, config): - """Train a SPINN model. +def train_or_infer_spinn(embed, + word2index, + train_data, + dev_data, + test_data, + config): + """Perform Training or Inference on a SPINN model. Args: embed: The embedding matrix as a float32 numpy array with shape [vocabulary_size, word_vector_len]. word_vector_len is the length of a word embedding vector. + word2index: A `dict` mapping word to word index. train_data: An instance of `data.SnliData`, for the train split. dev_data: Same as above, for the dev split. test_data: Same as above, for the test split. @@ -550,13 +565,35 @@ def train_spinn(embed, train_data, dev_data, test_data, config): details. Returns: - 1. Final loss value on the test split. - 2. Final fraction of correct classifications on the test split. + If `config.inference_premise ` and `config.inference_hypothesis` are not + `None`, i.e., inference mode: the logits for the possible labels of the + SNLI data set, as numpy array of three floats. + else: + The trainer object. + Raises: + ValueError: if only one of config.inference_premise and + config.inference_hypothesis is specified. """ + # TODO(cais): Refactor this function into separate one for training and + # inference. use_gpu = tfe.num_gpus() > 0 and not config.force_cpu device = "gpu:0" if use_gpu else "cpu:0" print("Using device: %s" % device) + if ((config.inference_premise and not config.inference_hypothesis) or + (not config.inference_premise and config.inference_hypothesis)): + raise ValueError( + "--inference_premise and --inference_hypothesis must be both " + "specified or both unspecified, but only one is specified.") + + if config.inference_premise: + # Inference mode. + inference_sentence_pair = [ + data.encode_sentence(config.inference_premise, word2index), + data.encode_sentence(config.inference_hypothesis, word2index)] + else: + inference_sentence_pair = None + log_header = ( " Time Epoch Iteration Progress (%Epoch) Loss Dev/Loss" " Accuracy Dev/Accuracy") @@ -569,16 +606,36 @@ def train_spinn(embed, train_data, dev_data, test_data, config): summary_writer = tf.contrib.summary.create_file_writer( config.logdir, flush_millis=10000) - train_len = train_data.num_batches(config.batch_size) + with tf.device(device), \ - tfe.restore_variables_on_create( - tf.train.latest_checkpoint(config.logdir)), \ summary_writer.as_default(), \ tf.contrib.summary.always_record_summaries(): - model = SNLIClassifier(config, embed) - global_step = tf.train.get_or_create_global_step() - trainer = SNLIClassifierTrainer(model, config.lr) - + with tfe.restore_variables_on_create( + tf.train.latest_checkpoint(config.logdir)): + model = SNLIClassifier(config, embed) + global_step = tf.train.get_or_create_global_step() + trainer = SNLIClassifierTrainer(model, config.lr) + + if inference_sentence_pair: + # Inference mode. + with tfe.restore_variables_on_create( + tf.train.latest_checkpoint(config.logdir)): + prem, prem_trans = inference_sentence_pair[0] + hypo, hypo_trans = inference_sentence_pair[1] + hypo_trans = inference_sentence_pair[1][1] + inference_logits = model( # pylint: disable=not-callable + tf.constant(prem), tf.constant(prem_trans), + tf.constant(hypo), tf.constant(hypo_trans), training=False) + inference_logits = np.array(inference_logits[0][1:]) + max_index = np.argmax(inference_logits) + print("\nInference logits:") + for i, (label, logit) in enumerate( + zip(data.POSSIBLE_LABELS, inference_logits)): + winner_tag = " (winner)" if max_index == i else "" + print(" {0:<16}{1:.6f}{2}".format(label + ":", logit, winner_tag)) + return inference_logits + + train_len = train_data.num_batches(config.batch_size) start = time.time() iterations = 0 mean_loss = tfe.metrics.Mean() @@ -594,23 +651,24 @@ def train_spinn(embed, train_data, dev_data, test_data, config): # remain on CPU. Same in _evaluate_on_dataset(). iterations += 1 - batch_train_loss, batch_train_logits = trainer.train_batch( - label, prem, prem_trans, hypo, hypo_trans) + with tfe.restore_variables_on_create( + tf.train.latest_checkpoint(config.logdir)): + batch_train_loss, batch_train_logits = trainer.train_batch( + label, prem, prem_trans, hypo, hypo_trans) batch_size = tf.shape(label)[0] mean_loss(batch_train_loss.numpy(), weights=batch_size.gpu() if use_gpu else batch_size) accuracy(tf.argmax(batch_train_logits, axis=1), label) if iterations % config.save_every == 0: - all_variables = ( - model.variables + [trainer.learning_rate] + [global_step]) + all_variables = trainer.variables + [global_step] saver = tfe.Saver(all_variables) saver.save(os.path.join(config.logdir, "ckpt"), global_step=global_step) if iterations % config.dev_every == 0: dev_loss, dev_frac_correct = _evaluate_on_dataset( - dev_data, config.batch_size, model, trainer, use_gpu) + dev_data, config.batch_size, trainer, use_gpu) print(dev_log_template.format( time.time() - start, epoch, iterations, 1 + batch_idx, train_len, @@ -638,10 +696,12 @@ def train_spinn(embed, train_data, dev_data, test_data, config): trainer.decay_learning_rate(config.lr_decay_by) test_loss, test_frac_correct = _evaluate_on_dataset( - test_data, config.batch_size, model, trainer, use_gpu) + test_data, config.batch_size, trainer, use_gpu) print("Final test loss: %g; accuracy: %g%%" % (test_loss, test_frac_correct * 100.0)) + return trainer + def main(_): config = FLAGS @@ -650,18 +710,24 @@ def main(_): vocab = data.load_vocabulary(FLAGS.data_root) word2index, embed = data.load_word_vectors(FLAGS.data_root, vocab) - print("Loading train, dev and test data...") - train_data = data.SnliData( - os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_train.txt"), - word2index, sentence_len_limit=FLAGS.sentence_len_limit) - dev_data = data.SnliData( - os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_dev.txt"), - word2index, sentence_len_limit=FLAGS.sentence_len_limit) - test_data = data.SnliData( - os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_test.txt"), - word2index, sentence_len_limit=FLAGS.sentence_len_limit) - - train_spinn(embed, train_data, dev_data, test_data, config) + if not (config.inference_premise or config.inference_hypothesis): + print("Loading train, dev and test data...") + train_data = data.SnliData( + os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_train.txt"), + word2index, sentence_len_limit=FLAGS.sentence_len_limit) + dev_data = data.SnliData( + os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_dev.txt"), + word2index, sentence_len_limit=FLAGS.sentence_len_limit) + test_data = data.SnliData( + os.path.join(FLAGS.data_root, "snli/snli_1.0/snli_1.0_test.txt"), + word2index, sentence_len_limit=FLAGS.sentence_len_limit) + else: + train_data = None + dev_data = None + test_data = None + + train_or_infer_spinn( + embed, word2index, train_data, dev_data, test_data, config) if __name__ == "__main__": @@ -678,6 +744,15 @@ if __name__ == "__main__": parser.add_argument("--logdir", type=str, default="/tmp/spinn-logs", help="Directory in which summaries will be written for " "TensorBoard.") + parser.add_argument("--inference_premise", type=str, default=None, + help="Premise sentence for inference. Must be " + "accompanied by --inference_hypothesis. If specified, " + "will override all training parameters and perform " + "inference.") + parser.add_argument("--inference_hypothesis", type=str, default=None, + help="Hypothesis sentence for inference. Must be " + "accompanied by --inference_premise. If specified, will " + "override all training parameters and perform inference.") parser.add_argument("--epochs", type=int, default=50, help="Number of epochs to train.") parser.add_argument("--batch_size", type=int, default=128, -- GitLab From ca19b32e4d1574ad29e36dbc164c320aeca80d47 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 14 Feb 2018 00:13:00 -0800 Subject: [PATCH 162/629] cifar 10 divergance fix and batchnorm unit test fix --- .../core/kernels/mkl_fused_batch_norm_op.cc | 96 +++++++++++++------ tensorflow/core/kernels/mkl_relu_op.cc | 20 +++- 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..b7dee3fb3e 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1110,19 +1110,12 @@ class MklFusedBatchNormGradOp : public OpKernel { return; } - if (dnn_shape_src.IsMklTensor()) - depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); - else - ExtractParams(context); - - memory::format format_m; if (dnn_shape_src.IsMklTensor()) { - if (dnn_shape_src.IsTensorInNCHWFormat()) - format_m = memory::format::nchw; - else - format_m = memory::format::nhwc; + depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); + } else if (dnn_shape_diff_dst.IsMklTensor()) { + depth_ = dnn_shape_diff_dst.DimSize(MklDnnDims::Dim_C); } else { - format_m = TFDataFormatToMklDnnDataFormat(tensor_format_); + ExtractParams(context); } MklDnnData src(&cpu_engine); @@ -1146,20 +1139,20 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); - // set src and diff_dst primitives + // set src and diff_dst primitives according to input layout memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { - if (dnn_shape_src.IsMklTensor()) { - src_md = dnn_shape_src.GetMklLayout(); - diff_dst_md = src_md; - } else { - diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); - src_md = diff_dst_md; - } + if (dnn_shape_src.IsMklTensor()) { + src_md = dnn_shape_src.GetMklLayout(); } else { - src_md = memory::desc(src_dims, MklDnnType(), format_m); - diff_dst_md = src_md; + src_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + if (dnn_shape_diff_dst.IsMklTensor()) { + diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + } else { + diff_dst_md = memory::desc(diff_dst_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); } src.SetUsrMem(src_md, &src_tensor); diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); @@ -1211,28 +1204,64 @@ class MklFusedBatchNormGradOp : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + + // MKL-DNN's BN primitive not provide API to fetch internal format + // set common_md as OpMem + // src and diff_dst will reorder to common_md + // diff_src will set as common_md + memory::desc common_md({}, memory::data_undef, memory::format_undef); + if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + common_md = dnn_shape_src.GetMklLayout(); + } else { + common_md = dnn_shape_diff_dst.GetMklLayout(); + } + } else { + common_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + // if any of src and diff_dst as mkl layout, + // then we set diff_src as mkl layout + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc(); + // set diff_src's mkl layout as common_md + auto diff_src_pd = memory::primitive_desc(common_md, cpu_engine); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, - format_m); - dnn_shape_diff_src.SetTfDimOrder(dnn_shape_src.GetDimension(), - tensor_format_); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_src.GetDimension(), + src_dims, + dnn_shape_src.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_src.GetDimension(), + tensor_format_); + } else { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_diff_dst.GetDimension(), + src_dims, + dnn_shape_diff_dst.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_diff_dst.GetDimension(), + tensor_format_); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyont should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, tf_shape_diff_src, dnn_shape_diff_src); - diff_src.SetUsrMem(src_md, diff_src_tensor); + // set diff_src + diff_src.SetUsrMem(common_md, diff_src_tensor); prop_kind pk = prop_kind::backward; auto bnrm_bwd_desc = batch_normalization_backward::desc( - pk, diff_src.GetUsrMemDesc(), src.GetUsrMemDesc(), epsilon_, + pk, common_md, common_md, epsilon_, /* for inference, specify use_global_stats 1. on fwd prop, use mean and variance provided as inputs @@ -1245,11 +1274,16 @@ class MklFusedBatchNormGradOp : public OpKernel { auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc( bnrm_bwd_desc, cpu_engine, bnrm_fwd_pd); + std::vector net; + src.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + diff_dst.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + auto bnrm_bwd_op = batch_normalization_backward( bnrm_bwd_pd, src.GetOpMem(), mean.GetOpMem(), variance.GetOpMem(), diff_dst.GetOpMem(), weights_m, diff_src.GetOpMem(), diff_weights_m); - std::vector net; net.push_back(bnrm_bwd_op); stream(stream::kind::eager).submit(net).wait(); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..924b9da7e0 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -368,8 +368,11 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } + + #else // INTEL_MKL_ML + template class MklReluOpBase : public OpKernel { public: @@ -579,17 +582,26 @@ class MklReluGradOpBase : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyone should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From 4456916672c32589d6681d43f72bb36b6bb64836 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 00:38:42 -0800 Subject: [PATCH 163/629] Fix the comment for the return type _dnn_model_fn and _dnn_linear_combined_model_fn. PiperOrigin-RevId: 185651142 --- tensorflow/python/estimator/canned/dnn.py | 4 +--- tensorflow/python/estimator/canned/dnn_linear_combined.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/canned/dnn.py b/tensorflow/python/estimator/canned/dnn.py index c29b5cabc7..7043da8de0 100644 --- a/tensorflow/python/estimator/canned/dnn.py +++ b/tensorflow/python/estimator/canned/dnn.py @@ -150,9 +150,7 @@ def _dnn_model_fn(features, config: `RunConfig` object to configure the runtime settings. Returns: - predictions: A dict of `Tensor` objects. - loss: A scalar containing the loss of the step. - train_op: The op for training. + An `EstimatorSpec` instance. Raises: ValueError: If features has the wrong type. diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined.py b/tensorflow/python/estimator/canned/dnn_linear_combined.py index 0c54013a52..6d0fb96057 100644 --- a/tensorflow/python/estimator/canned/dnn_linear_combined.py +++ b/tensorflow/python/estimator/canned/dnn_linear_combined.py @@ -117,7 +117,7 @@ def _dnn_linear_combined_model_fn(features, config: `RunConfig` object to configure the runtime settings. Returns: - `ModelFnOps` + An `EstimatorSpec` instance. Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` -- GitLab From 7f06d633e58ba37cbf654c1371135100260f20d8 Mon Sep 17 00:00:00 2001 From: Ian Langmore Date: Wed, 14 Feb 2018 04:02:15 -0800 Subject: [PATCH 164/629] effective_sample_size kwarg change (same default behavior). * rename max_lags --> filter_beyond_lag * rename max_lags_threshold --> filter_threshold * Users can use both filters, and they combine in an "OR" manner * None ==> turn off a filter. PiperOrigin-RevId: 185666926 --- .../kernel_tests/mcmc_diagnostics_test.py | 105 ++++++++++++++---- .../python/ops/mcmc_diagnostics_impl.py | 81 +++++++------- 2 files changed, 123 insertions(+), 63 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py index d68fc9081a..52e36e135d 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py @@ -41,12 +41,14 @@ class _EffectiveSampleSizeTest(object): sess, atol=1e-2, rtol=1e-2, - max_lags_threshold=None, - max_lags=None): + filter_threshold=None, + filter_beyond_lag=None): x = array_ops.placeholder_with_default( input=x_, shape=x_.shape if self.use_static_shape else None) ess = mcmc_diagnostics.effective_sample_size( - x, max_lags_threshold=max_lags_threshold, max_lags=max_lags) + x, + filter_threshold=filter_threshold, + filter_beyond_lag=filter_beyond_lag) if self.use_static_shape: self.assertAllEqual(x.shape[1:], ess.shape) @@ -56,18 +58,19 @@ class _EffectiveSampleSizeTest(object): np.ones_like(ess_) * expected_ess, ess_, atol=atol, rtol=rtol) def testIidRank1NormalHasFullEssMaxLags10(self): - # With a length 5000 iid normal sequence, and max_lags = 10, we should - # have a good estimate of ESS, and it should be close to the full sequence - # length of 5000. - # The choice of max_lags = 10 is a short cutoff, reasonable only since we - # know the correlation length should be zero right away. + # With a length 5000 iid normal sequence, and filter_beyond_lag = 10, we + # should have a good estimate of ESS, and it should be close to the full + # sequence length of 5000. + # The choice of filter_beyond_lag = 10 is a short cutoff, reasonable only + # since we know the correlation length should be zero right away. with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): self._check_versus_expected_effective_sample_size( x_=rng.randn(5000).astype(np.float32), expected_ess=5000, sess=sess, - max_lags=10, + filter_beyond_lag=10, + filter_threshold=None, rtol=0.3) def testIidRank2NormalHasFullEssMaxLags10(self): @@ -78,23 +81,25 @@ class _EffectiveSampleSizeTest(object): x_=rng.randn(5000, 2).astype(np.float32), expected_ess=5000, sess=sess, - max_lags=10, + filter_beyond_lag=10, + filter_threshold=None, rtol=0.3) def testIidRank1NormalHasFullEssMaxLagThresholdZero(self): - # With a length 5000 iid normal sequence, and max_lags_threshold = 0, + # With a length 5000 iid normal sequence, and filter_threshold = 0, # we should have a super-duper estimate of ESS, and it should be very close # to the full sequence length of 5000. - # The choice of max_lags_cutoff = 0 means we cutoff as soon as the auto-corr - # is below zero. This should happen very quickly, due to the fact that the - # theoretical auto-corr is [1, 0, 0,...] + # The choice of filter_beyond_lag = 0 means we cutoff as soon as the + # auto-corris below zero. This should happen very quickly, due to the fact + # that the theoretical auto-corr is [1, 0, 0,...] with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): self._check_versus_expected_effective_sample_size( x_=rng.randn(5000).astype(np.float32), expected_ess=5000, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testIidRank2NormalHasFullEssMaxLagThresholdZero(self): @@ -105,7 +110,8 @@ class _EffectiveSampleSizeTest(object): x_=rng.randn(5000, 2).astype(np.float32), expected_ess=5000, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLags50(self): @@ -121,7 +127,8 @@ class _EffectiveSampleSizeTest(object): x_=x_, expected_ess=50000 // 10, sess=sess, - max_lags=50, + filter_beyond_lag=50, + filter_threshold=None, rtol=0.2) def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLagsThresholdZero( @@ -138,7 +145,8 @@ class _EffectiveSampleSizeTest(object): x_=x_, expected_ess=50000 // 10, sess=sess, - max_lags_threshold=0., + filter_beyond_lag=None, + filter_threshold=0., rtol=0.1) def testListArgs(self): @@ -148,16 +156,16 @@ class _EffectiveSampleSizeTest(object): x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,)) y_ = rng.randn(50000).astype(np.float32) states = [x_, x_, y_, y_] - max_lags_threshold = [0., None, 0., None] - max_lags = [None, 5, None, 5] + filter_threshold = [0., None, 0., None] + filter_beyond_lag = [None, 5, None, 5] # See other tests for reasoning on tolerance. with self.test_session() as sess: with spectral_ops_test_util.fft_kernel_label_map(): ess = mcmc_diagnostics.effective_sample_size( states, - max_lags_threshold=max_lags_threshold, - max_lags=max_lags) + filter_threshold=filter_threshold, + filter_beyond_lag=filter_beyond_lag) ess_ = sess.run(ess) self.assertAllEqual(4, len(ess_)) @@ -166,6 +174,59 @@ class _EffectiveSampleSizeTest(object): self.assertAllClose(50000, ess_[2], rtol=0.1) self.assertAllClose(50000, ess_[3], rtol=0.1) + def testMaxLagsThresholdLessThanNeg1SameAsNone(self): + # Setting both means we filter out items R_k from the auto-correlation + # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. + + # x_ has correlation length 10. + iid_x_ = rng.randn(500, 1).astype(np.float32) + x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) + with self.test_session() as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + x = array_ops.placeholder_with_default( + input=x_, shape=x_.shape if self.use_static_shape else None) + + ess_none_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=None, filter_beyond_lag=None) + ess_none_200 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=None, filter_beyond_lag=200) + ess_neg2_200 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=-2., filter_beyond_lag=200) + ess_neg2_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=-2., filter_beyond_lag=None) + ess_none_none_, ess_none_200_, ess_neg2_200_, ess_neg2_none_ = sess.run( + [ess_none_none, ess_none_200, ess_neg2_200, ess_neg2_none]) + + # filter_threshold=-2 <==> filter_threshold=None. + self.assertAllClose(ess_none_none_, ess_neg2_none_) + self.assertAllClose(ess_none_200_, ess_neg2_200_) + + def testMaxLagsArgsAddInAnOrManner(self): + # Setting both means we filter out items R_k from the auto-correlation + # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. + + # x_ has correlation length 10. + iid_x_ = rng.randn(500, 1).astype(np.float32) + x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) + with self.test_session() as sess: + with spectral_ops_test_util.fft_kernel_label_map(): + x = array_ops.placeholder_with_default( + input=x_, shape=x_.shape if self.use_static_shape else None) + + ess_1_9 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=9) + ess_1_none = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=None) + ess_none_9 = mcmc_diagnostics.effective_sample_size( + x, filter_threshold=1., filter_beyond_lag=9) + ess_1_9_, ess_1_none_, ess_none_9_ = sess.run( + [ess_1_9, ess_1_none, ess_none_9]) + + # Since R_k = 1 for k < 10, and R_k < 1 for k >= 10, + # filter_threshold = 1 <==> filter_beyond_lag = 9. + self.assertAllClose(ess_1_9_, ess_1_none_) + self.assertAllClose(ess_1_9_, ess_none_9_) + class EffectiveSampleSizeStaticTest(test.TestCase, _EffectiveSampleSizeTest): diff --git a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py b/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py index bb8b915a9b..0424b6952b 100644 --- a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py @@ -36,13 +36,13 @@ __all__ = [ def effective_sample_size(states, - max_lags_threshold=None, - max_lags=None, + filter_threshold=0., + filter_beyond_lag=None, name=None): """Estimate a lower bound on effective sample size for each independent chain. - Roughly speaking, the "effective sample size" (ESS) is the size of an iid - sample with the same variance as `state`. + Roughly speaking, "effective sample size" (ESS) is the size of an iid sample + with the same variance as `state`. More precisely, given a stationary sequence of possibly correlated random variables `X_1, X_2,...,X_N`, each identically distributed ESS is the number @@ -87,21 +87,28 @@ def effective_sample_size(states, This function estimates the above by first estimating the auto-correlation. Since `R_k` must be estimated using only `N - k` samples, it becomes progressively noisier for larger `k`. For this reason, the summation over - `R_k` should be truncated at some number `max_lags < N`. Since many MCMC - methods generate chains where `R_k > 0`, a reasonable critera is to truncate - at the first index where the estimated auto-correlation becomes negative. + `R_k` should be truncated at some number `filter_beyond_lag < N`. Since many + MCMC methods generate chains where `R_k > 0`, a reasonable critera is to + truncate at the first index where the estimated auto-correlation becomes + negative. + + The arguments `filter_beyond_lag`, `filter_threshold` are filters intended to + remove noisy tail terms from `R_k`. They combine in an "OR" manner meaning + terms are removed if they were to be filtered under the `filter_beyond_lag` OR + `filter_threshold` criteria. Args: states: `Tensor` or list of `Tensor` objects. Dimension zero should index identically distributed states. - max_lags_threshold: `Tensor` or list of `Tensor` objects. + filter_threshold: `Tensor` or list of `Tensor` objects. Must broadcast with `state`. The auto-correlation sequence is truncated - after the first appearance of a term less than `max_lags_threshold`. If - both `max_lags` and `max_lags_threshold` are `None`, - `max_lags_threshold` defaults to `0`. - max_lags: `Tensor` or list of `Tensor` objects. Must be `int`-like and - scalar valued. The auto-correlation sequence is truncated to this length. - May be provided only if `max_lags_threshold` is not. + after the first appearance of a term less than `filter_threshold`. + Setting to `None` means we use no threshold filter. Since `|R_k| <= 1`, + setting to any number less than `-1` has the same effect. + filter_beyond_lag: `Tensor` or list of `Tensor` objects. Must be + `int`-like and scalar valued. The auto-correlation sequence is truncated + to this length. Setting to `None` means we do not filter based on number + of lags. name: `String` name to prepend to created ops. Returns: @@ -109,8 +116,8 @@ def effective_sample_size(states, each component of `states`. Shape will be `states.shape[1:]`. Raises: - ValueError: If `states` and `max_lags_threshold` or `states` and `max_lags` - are both lists with different lengths. + ValueError: If `states` and `filter_threshold` or `states` and + `filter_beyond_lag` are both lists with different lengths. """ states_was_list = _is_list_like(states) @@ -118,15 +125,16 @@ def effective_sample_size(states, if not states_was_list: states = [states] - max_lags = _broadcast_maybelist_arg(states, max_lags, "max_lags") - max_lags_threshold = _broadcast_maybelist_arg(states, max_lags_threshold, - "max_lags_threshold") + filter_beyond_lag = _broadcast_maybelist_arg(states, filter_beyond_lag, + "filter_beyond_lag") + filter_threshold = _broadcast_maybelist_arg(states, filter_threshold, + "filter_threshold") # Process items, one at a time. with ops.name_scope(name, "effective_sample_size"): ess_list = [ _effective_sample_size_single_state(s, ml, mlt) - for (s, ml, mlt) in zip(states, max_lags, max_lags_threshold) + for (s, ml, mlt) in zip(states, filter_beyond_lag, filter_threshold) ] if states_was_list: @@ -134,38 +142,31 @@ def effective_sample_size(states, return ess_list[0] -def _effective_sample_size_single_state(states, max_lags, max_lags_threshold): +def _effective_sample_size_single_state(states, filter_beyond_lag, + filter_threshold): """ESS computation for one single Tensor argument.""" - if max_lags is not None and max_lags_threshold is not None: - raise ValueError( - "Expected at most one of max_lags, max_lags_threshold to be provided. " - "Found: {}, {}".format(max_lags, max_lags_threshold)) - - if max_lags_threshold is None: - max_lags_threshold = 0. with ops.name_scope( "effective_sample_size_single_state", - values=[states, max_lags, max_lags_threshold]): + values=[states, filter_beyond_lag, filter_threshold]): states = ops.convert_to_tensor(states, name="states") dt = states.dtype - if max_lags is not None: - auto_corr = sample_stats.auto_correlation( - states, axis=0, max_lags=max_lags) - elif max_lags_threshold is not None: - max_lags_threshold = ops.convert_to_tensor( - max_lags_threshold, dtype=dt, name="max_lags_threshold") - auto_corr = sample_stats.auto_correlation(states, axis=0) + # filter_beyond_lag == None ==> auto_corr is the full sequence. + auto_corr = sample_stats.auto_correlation( + states, axis=0, max_lags=filter_beyond_lag) + if filter_threshold is not None: + filter_threshold = ops.convert_to_tensor( + filter_threshold, dtype=dt, name="filter_threshold") # Get a binary mask to zero out values of auto_corr below the threshold. # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, # mask[i, ...] = 0, otherwise. # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] # Building step by step, - # Assume auto_corr = [1, 0.5, 0.0, 0.3], and max_lags_threshold = 0.2. + # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. # Step 1: mask = [False, False, True, False] - mask = auto_corr < max_lags_threshold + mask = auto_corr < filter_threshold # Step 2: mask = [0, 0, 1, 1] mask = math_ops.cast(mask, dtype=dt) # Step 3: mask = [0, 0, 1, 2] @@ -173,14 +174,12 @@ def _effective_sample_size_single_state(states, max_lags, max_lags_threshold): # Step 4: mask = [1, 1, 0, 0] mask = math_ops.maximum(1. - mask, 0.) auto_corr *= mask - else: - auto_corr = sample_stats.auto_correlation(states, axis=0) # With R[k] := auto_corr[k, ...], # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} - #, where M is the max_lags truncation point chosen above. + # where M is the filter_beyond_lag truncation point chosen above. # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total # ndims the same as auto_corr -- GitLab From 0f689f8a18458b265d7290a496bf3e400332f247 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Wed, 14 Feb 2018 08:18:10 -0800 Subject: [PATCH 165/629] Indentation fix. --- tensorflow/python/ops/image_ops_impl.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 79acd3dbef..451a679c1a 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -419,13 +419,13 @@ def _rot90_3D(image, k, name_scope): return array_ops.reverse_v2(image, [0, 1]) def _rot270(): return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), - [1]) + [1]) cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), (math_ops.equal(k, 3), _rot270)] result = control_flow_ops.case(cases, default=lambda: image, exclusive=True, - name=name_scope) + name=name_scope) result.set_shape([None, None, image.get_shape()[2]]) return result @@ -434,7 +434,8 @@ def _rot90_4D(images, k, name_scope): Args: images: 4-D Tensor of shape `[height, width, channels]`. - k: A scalar integer. The number of times the images are rotated by 90 degrees. + k: A scalar integer. The number of times the images are rotated by 90 + degrees. name_scope: A valid TensorFlow name scope. Returns: @@ -455,7 +456,7 @@ def _rot90_4D(images, k, name_scope): (math_ops.equal(k, 3), _rot270)] result = control_flow_ops.case(cases, default=lambda: images, exclusive=True, - name=name_scope) + name=name_scope) shape = result.get_shape() result.set_shape([shape[0], None, None, shape[3]]) return result @@ -1121,9 +1122,9 @@ def adjust_contrast(images, contrast_factor): def adjust_gamma(image, gamma=1, gain=1): """Performs Gamma Correction on the input image. - Also known as Power Law Transform. This function transforms the - input image pixelwise according to the equation Out = In**gamma - after scaling each pixel to the range 0 to 1. + Also known as Power Law Transform. This function transforms the + input image pixelwise according to the equation `Out = In**gamma` + after scaling each pixel to the range 0 to 1. Args: image : A Tensor. -- GitLab From 3512986548d1914054a957841e58831c54729475 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 08:18:17 -0800 Subject: [PATCH 166/629] Build file bug fix for iOS simulators. PiperOrigin-RevId: 185688662 --- tensorflow/contrib/lite/kernels/internal/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index a6ccc99a51..f47fb04cba 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -345,6 +345,9 @@ cc_library( ":ios_arm64": [ ":neon_tensor_utils", ], + ":ios_x86_64": [ + ":neon_tensor_utils", + ], ":x86_64": [ ":neon_tensor_utils", ], -- GitLab From 6f764de530c31ab1d5f08a4c07c6cb0fa2e492b4 Mon Sep 17 00:00:00 2001 From: Noah Eisen Date: Wed, 14 Feb 2018 08:18:41 -0800 Subject: [PATCH 167/629] Decreases number of gRPC polling threads from 8 to 2. PiperOrigin-RevId: 185688704 --- tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc | 2 +- tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc index bb14e0197b..2ed07e3669 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc @@ -34,7 +34,7 @@ namespace { class GrpcWorkerCache : public WorkerCachePartial { public: // TODO(ncteisen): consider adding a config var or flag for this - static constexpr const size_t kGrpcWorkerCacheThreadCount = 8; + static constexpr const size_t kGrpcWorkerCacheThreadCount = 2; explicit GrpcWorkerCache(GrpcChannelCache* channel_cache, WorkerInterface* local_worker, diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index b20e744a97..1beb198732 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -52,7 +52,7 @@ namespace { class GrpcWorkerService : public AsyncServiceInterface { // TODO(ncteisen): consider adding a config var or flag for this - static constexpr const size_t kGrpcWorkerServiceThreadCount = 8; + static constexpr const size_t kGrpcWorkerServiceThreadCount = 2; public: GrpcWorkerService(GrpcWorker* worker, ::grpc::ServerBuilder* builder) -- GitLab From c88617f0ec128a3012f011df9d3febbec5a8c3f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 09:10:46 -0800 Subject: [PATCH 168/629] Canonicalize list comprehensions into equivalent for and if statements. PiperOrigin-RevId: 185695579 --- tensorflow/contrib/py2tf/converters/BUILD | 12 +++ .../py2tf/converters/list_comprehension.py | 80 +++++++++++++++++++ .../converters/list_comprehension_test.py | 75 +++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 tensorflow/contrib/py2tf/converters/list_comprehension.py create mode 100644 tensorflow/contrib/py2tf/converters/list_comprehension_test.py diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 62de8107a3..b6574fc183 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -25,6 +25,7 @@ py_library( "control_flow.py", "decorators.py", "for_canonicalization.py", + "list_comprehension.py", "logical_expressions.py", "side_effect_guards.py", ], @@ -138,6 +139,17 @@ py_test( ], ) +py_test( + name = "list_comprehension_test", + srcs = ["list_comprehension_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":test_lib", + "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "logical_expressions_test", srcs = ["logical_expressions_test.py"], diff --git a/tensorflow/contrib/py2tf/converters/list_comprehension.py b/tensorflow/contrib/py2tf/converters/list_comprehension.py new file mode 100644 index 0000000000..e874483110 --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/list_comprehension.py @@ -0,0 +1,80 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Canonicalizing list comprehensions into for and if statements. + +e.g. +result = [x * x for x in xs] + +becomes + +result = [] +for x in xs: + elt = x * x + result.append(elt) +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast + +from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer + + +class ListCompCanonicalizationTransformer(transformer.Base): + """NodeTransformer to canonicalize list comprehensions.""" + + def __init__(self, context): + super(ListCompCanonicalizationTransformer, self).__init__(context) + + def make_update_list_node(self, list_, elt): + return templates.replace('list_.append(elt)', list_=list_, elt=elt)[0] + + def instantiate_list_node(self): + return parser.parse_str('[]').body[0].value + + def visit_Assign(self, node): + if not isinstance(node.value, gast.ListComp): + return node + if len(node.targets) > 1: + raise ValueError('Only support single assignment.') + return self.canonicalize_listcomp(node.targets[0], node.value) + + def canonicalize_listcomp(self, result_node, list_comp_node): + + make_list = templates.replace( + 'list_ = create_list', + list_=result_node, + create_list=self.instantiate_list_node()) + loop_body = self.make_update_list_node(result_node, list_comp_node.elt) + + for gen in reversed(list_comp_node.generators): + for gen_if in reversed(gen.ifs): + loop_body = templates.replace( + 'if test: loop_body', test=gen_if, loop_body=loop_body) + loop_body = templates.replace( + 'for target in iter_: loop_body', + iter_=gen.iter, + target=gen.target, + loop_body=loop_body) + + return make_list + loop_body + + +def transform(node, context): + return ListCompCanonicalizationTransformer(context).visit(node) diff --git a/tensorflow/contrib/py2tf/converters/list_comprehension_test.py b/tensorflow/contrib/py2tf/converters/list_comprehension_test.py new file mode 100644 index 0000000000..025fac11e4 --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/list_comprehension_test.py @@ -0,0 +1,75 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for list_comprehension module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.py2tf.converters import list_comprehension +from tensorflow.python.platform import test + + +class ListCompTest(converter_test_base.TestCase): + + def test_basic(self): + + def test_fn(l): + s = [e * e for e in l] + return s + + node = self.parse_and_analyze(test_fn, {}) + node = list_comprehension.transform(node, self.ctx) + + with self.compiled(node) as result: + l = [1, 2, 3] + self.assertEqual(test_fn(l), result.test_fn(l)) + l = [] + self.assertEqual(test_fn(l), result.test_fn(l)) + + def test_multiple_generators(self): + + def test_fn(l): + s = [e * e for sublist in l for e in sublist] + return s + + node = self.parse_and_analyze(test_fn, {}) + node = list_comprehension.transform(node, self.ctx) + + with self.compiled(node) as result: + l = [[1], [2], [3]] + self.assertEqual(test_fn(l), result.test_fn(l)) + l = [] + self.assertEqual(test_fn(l), result.test_fn(l)) + + def test_conds(self): + + def test_fn(l): + s = [e * e for e in l if e > 1] + return s + + node = self.parse_and_analyze(test_fn, {}) + node = list_comprehension.transform(node, self.ctx) + + with self.compiled(node) as result: + l = [1, 2, 3] + self.assertEqual(test_fn(l), result.test_fn(l)) + l = [] + self.assertEqual(test_fn(l), result.test_fn(l)) + + +if __name__ == '__main__': + test.main() -- GitLab From 046c5ee65678ed188aa798d710c97c1c4da9e835 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 14 Feb 2018 09:50:21 -0800 Subject: [PATCH 169/629] API incompatibility fix in _UnreadVariable PiperOrigin-RevId: 185700704 --- tensorflow/python/ops/resource_variable_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 9ab789abad..62c65c1dcf 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -933,6 +933,7 @@ class _UnreadVariable(ResourceVariable): return gen_resource_variable_ops.read_variable_op(self._handle, self._dtype) + @property def op(self): """The op for this variable.""" return self._parent_op -- GitLab From cc617064703957aa699bc01c65c7f03f1e6f6b22 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 10:14:58 -0800 Subject: [PATCH 170/629] Factor VectorOfTensors out of concatenation.cc PiperOrigin-RevId: 185704744 --- .../contrib/lite/kernels/concatenation.cc | 34 +------------- .../contrib/lite/kernels/internal/tensor.h | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/concatenation.cc b/tensorflow/contrib/lite/kernels/concatenation.cc index 7ff9075318..a619ada86a 100644 --- a/tensorflow/contrib/lite/kernels/concatenation.cc +++ b/tensorflow/contrib/lite/kernels/concatenation.cc @@ -96,38 +96,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return context->ResizeTensor(context, output, output_size); } -template -class VectorOfInputs { - public: - VectorOfInputs(const TfLiteContext& context, const TfLiteIntArray& inputs) { - int num_inputs = inputs.size; - - all_data_.reserve(num_inputs); - all_dims_.reserve(num_inputs); - all_dims_ptr_.reserve(num_inputs); - - for (int i = 0; i < num_inputs; ++i) { - TfLiteTensor* input = &context.tensors[inputs.data[i]]; - all_data_.push_back(GetTensorData(input)); - all_dims_.push_back(GetTensorDims(input)); - } - - // Taking the pointer from inside a std::vector is only OK if the vector is - // never modified, so we populate all_dims in the previous loop and then we - // are free to grab iterators here. - for (int i = 0; i < num_inputs; ++i) { - all_dims_ptr_.push_back(&all_dims_[i]); - } - } - const T* const* data() const { return all_data_.data(); } - const Dims<4>* const* dims() const { return all_dims_ptr_.data(); } - - private: - std::vector all_data_; - std::vector> all_dims_; - std::vector*> all_dims_ptr_; -}; - template TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = @@ -141,7 +109,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // TODO(ycling): Activation function parameter is ignored. For now we dont have // a model with a Concatenation with fused activation function. #define TF_LITE_CONCATENATION(type, scalar) \ - VectorOfInputs all_inputs(*context, *node->inputs); \ + VectorOfTensors all_inputs(*context, *node->inputs); \ type::Concatenation( \ RemapDim(NumDimensions(output), axis), all_inputs.data(), \ all_inputs.dims(), node->inputs->size, GetTensorData(output), \ diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h index dfe76c2afd..62e38e0d4c 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -81,6 +81,51 @@ inline Dims<4> GetTensorDims(const TfLiteTensor* tensor) { return GetTensorDims(dims->data, dims->size); } +// A list of tensors in a format that can be used by kernels like split and +// concatenation. +template +class VectorOfTensors { + public: + // Build with the tensors in 'tensor_list'. + VectorOfTensors(const TfLiteContext& context, + const TfLiteIntArray& tensor_list) { + int num_tensors = tensor_list.size; + + all_data_.reserve(num_tensors); + all_dims_.reserve(num_tensors); + all_dims_ptr_.reserve(num_tensors); + + for (int i = 0; i < num_tensors; ++i) { + TfLiteTensor* t = &context.tensors[tensor_list.data[i]]; + all_data_.push_back(GetTensorData(t)); + all_dims_.push_back(GetTensorDims(t)); + } + + // Taking the pointer from inside a std::vector is only OK if the vector is + // never modified, so we populate all_dims in the previous loop and then we + // are free to grab iterators here. + for (int i = 0; i < num_tensors; ++i) { + all_dims_ptr_.push_back(&all_dims_[i]); + } + } + // Return a pointer to the data pointers of all tensors in the list. For + // example: + // float* const* f = v.data(); + // f[0][1] is the second element of the first tensor. + T* const* data() const { return all_data_.data(); } + + // Return a pointer the dim pointers of all tensors in the list. For + // example: + // const Dims<4>* const* d = v.dims(); + // dims[1] are the dimensions of the second tensor in the list. + const Dims<4>* const* dims() const { return all_dims_ptr_.data(); } + + private: + std::vector all_data_; + std::vector> all_dims_; + std::vector*> all_dims_ptr_; +}; + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TENSOR_H_ -- GitLab From c174994e74b55cdcfcab82fc55de97c3e54e9ea8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 10:37:32 -0800 Subject: [PATCH 171/629] Tensorflow driver to execute the given tf graphdef, and generate output PiperOrigin-RevId: 185708396 --- tensorflow/contrib/lite/BUILD | 1 + tensorflow/contrib/lite/testdata/multi_add.pb | 26 +++ tensorflow/contrib/lite/testing/BUILD | 26 +++ tensorflow/contrib/lite/testing/tf_driver.cc | 189 ++++++++++++++++++ tensorflow/contrib/lite/testing/tf_driver.h | 75 +++++++ .../contrib/lite/testing/tf_driver_test.cc | 56 ++++++ 6 files changed, 373 insertions(+) create mode 100644 tensorflow/contrib/lite/testdata/multi_add.pb create mode 100644 tensorflow/contrib/lite/testing/tf_driver.cc create mode 100644 tensorflow/contrib/lite/testing/tf_driver.h create mode 100644 tensorflow/contrib/lite/testing/tf_driver_test.cc diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 3520a4eaf0..44c4a7e2ca 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -10,6 +10,7 @@ exports_files(["LICENSE"]) exports_files(glob([ "testdata/*.bin", + "testdata/*.pb", "models/testdata/*", ])) diff --git a/tensorflow/contrib/lite/testdata/multi_add.pb b/tensorflow/contrib/lite/testdata/multi_add.pb new file mode 100644 index 0000000000..e95a20841f --- /dev/null +++ b/tensorflow/contrib/lite/testdata/multi_add.pb @@ -0,0 +1,26 @@ + +I +a Placeholder" /device:CPU:0* +shape:* +dtype0 +I +b Placeholder" /device:CPU:0* +dtype0* +shape: +I +c Placeholder" /device:CPU:0* +dtype0* +shape: +I +d Placeholder" /device:CPU:0* +dtype0* +shape: +& +iAddbc" /device:CPU:0* +T0 +& +xAddai" /device:CPU:0* +T0 +& +yAdddi" /device:CPU:0* +T0" \ No newline at end of file diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index b949045128..9351cf9d64 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -195,6 +195,32 @@ cc_binary( ], ) +cc_library( + name = "tf_driver", + srcs = ["tf_driver.cc"], + hdrs = ["tf_driver.h"], + deps = [ + ":split", + ":test_runner", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + ], +) + +cc_test( + name = "tf_driver_test", + size = "small", + srcs = ["tf_driver_test.cc"], + data = ["//tensorflow/contrib/lite:testdata/multi_add.pb"], + deps = [ + ":tf_driver", + "@com_google_googletest//:gtest_main", + ], +) + tf_cc_test( name = "generated_examples_zip_test", size = "large", diff --git a/tensorflow/contrib/lite/testing/tf_driver.cc b/tensorflow/contrib/lite/testing/tf_driver.cc new file mode 100644 index 0000000000..da6c6ce7b1 --- /dev/null +++ b/tensorflow/contrib/lite/testing/tf_driver.cc @@ -0,0 +1,189 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tf_driver.h" + +#include +#include + +#include "tensorflow/contrib/lite/testing/split.h" +#include "tensorflow/core/lib/gtl/array_slice.h" + +namespace tflite { +namespace testing { + +namespace { + +tensorflow::Tensor CreateTensor(const tensorflow::DataType type, + const std::vector& dim) { + tensorflow::TensorShape shape{gtl::ArraySlice{ + reinterpret_cast(dim.data()), dim.size()}}; + return {type, shape}; +} + +template +void FillTensorWithData(tensorflow::Tensor* tensor, const string& csv_values) { + auto data = tensor->flat(); + + const auto& values = testing::Split(csv_values, ","); + for (int i = 0; i < values.size(); i++) { + data(i) = values[i]; + } +} + +template +void FillTensorWithZeros(tensorflow::Tensor* tensor) { + auto data = tensor->flat(); + for (int i = 0; i < tensor->NumElements(); i++) { + data(i) = 0; + } +} + +template +string TensorDataToCsvString(const tensorflow::Tensor& tensor) { + std::stringstream stream; + const auto& data = tensor.flat(); + if (data.size() == 0) { + return ""; + } + stream << data(0); + for (int i = 1; i < data.size(); i++) { + stream << "," << data(i); + } + return stream.str(); +} + +} // namespace + +TfDriver::TfDriver(const std::vector& input_layer, + const std::vector& input_layer_type, + const std::vector& input_layer_shape, + const std::vector& output_layer) + : input_names_(input_layer), output_names_(output_layer) { + CHECK_EQ(input_layer.size(), input_layer_type.size()); + CHECK_EQ(input_layer.size(), input_layer_shape.size()); + + input_ids_.resize(input_layer.size()); + input_tensors_.reserve(input_layer.size()); + input_types_.resize(input_layer.size()); + input_shapes_.resize(input_layer.size()); + for (int i = 0; i < input_layer.size(); i++) { + input_ids_[i] = i; + input_tensors_[input_layer[i]] = {}; + CHECK(DataTypeFromString(input_layer_type[i], &input_types_[i])); + input_shapes_[i] = Split(input_layer_shape[i], ","); + } + + output_ids_.resize(output_layer.size()); + output_tensors_.reserve(output_layer.size()); + for (int i = 0; i < output_layer.size(); i++) { + output_ids_[i] = i; + } +} + +void TfDriver::LoadModel(const string& bin_file_path) { + if (!IsValid()) return; + std::cout << std::endl << "Loading model: " << bin_file_path << std::endl; + std::ifstream model(bin_file_path); + if (model.fail()) { + Invalidate("Failed to find the model"); + return; + } + + tensorflow::GraphDef graphdef; + if (!graphdef.ParseFromIstream(&model)) { + Invalidate("Failed to parse tensorflow graphdef"); + return; + } + + tensorflow::SessionOptions options; + session_.reset(tensorflow::NewSession(options)); + auto status = session_->Create(graphdef); + if (!status.ok()) { + Invalidate("Failed to create session"); + } +} + +void TfDriver::SetInput(int id, const string& csv_values) { + if (!IsValid()) return; + + auto tensor = CreateTensor(input_types_[id], input_shapes_[id]); + switch (input_types_[id]) { + case tensorflow::DT_FLOAT: { + FillTensorWithData(&tensor, csv_values); + break; + } + case tensorflow::DT_INT32: { + FillTensorWithData(&tensor, csv_values); + break; + } + default: + fprintf(stderr, "Unsupported type %d in SetInput\n", input_types_[id]); + Invalidate("Unsupported tensor data type"); + return; + } + input_tensors_[input_names_[id]] = tensor; +} + +void TfDriver::ResetTensor(int id) { + if (!IsValid()) return; + auto tensor = input_tensors_[input_names_[id]]; + switch (input_types_[id]) { + case tensorflow::DT_FLOAT: { + FillTensorWithZeros(&tensor); + break; + } + case tensorflow::DT_INT32: { + FillTensorWithZeros(&tensor); + break; + } + default: + fprintf(stderr, "Unsupported type %d in ResetTensor\n", input_types_[id]); + Invalidate("Unsupported tensor data type"); + return; + } +} + +void TfDriver::ReshapeTensor(int id, const string& csv_values) { + input_shapes_[id] = Split(csv_values, ","); + input_tensors_[input_names_[id]] = + CreateTensor(input_types_[id], input_shapes_[id]); + ResetTensor(id); +} + +string TfDriver::ReadOutput(int id) { + if (!IsValid()) return ""; + switch (output_tensors_[id].dtype()) { + case tensorflow::DT_FLOAT: + return TensorDataToCsvString(output_tensors_[id]); + case tensorflow::DT_INT32: + return TensorDataToCsvString(output_tensors_[id]); + default: + fprintf(stderr, "Unsupported type %d in ResetTensor\n", input_types_[id]); + Invalidate("Unsupported tensor data type"); + return ""; + } +} + +void TfDriver::Invoke() { + if (!IsValid()) return; + auto status = session_->Run({input_tensors_.begin(), input_tensors_.end()}, + output_names_, {}, &output_tensors_); + if (!status.ok()) { + Invalidate("Failed to invoke interpreter"); + } +} + +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/tf_driver.h b/tensorflow/contrib/lite/testing/tf_driver.h new file mode 100644 index 0000000000..2928e57282 --- /dev/null +++ b/tensorflow/contrib/lite/testing/tf_driver.h @@ -0,0 +1,75 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TESTING_TF_DRIVER_H_ +#define TENSORFLOW_CONTRIB_LITE_TESTING_TF_DRIVER_H_ + +#include +#include + +#include "tensorflow/contrib/lite/testing/split.h" +#include "tensorflow/contrib/lite/testing/test_runner.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/public/session.h" + +namespace tflite { +namespace testing { + +// A test runner that feeds inputs into Tensorflow and generates outputs. +class TfDriver : public TestRunner { + public: + explicit TfDriver(const std::vector& input_layer, + const std::vector& input_layer_type, + const std::vector& input_layer_shape, + const std::vector& output_layer); + ~TfDriver() override {} + + void LoadModel(const string& bin_file_path) override; + void SetInput(int id, const string& csv_values) override; + void Invoke() override; + string ReadOutput(int id); + + const std::vector& GetInputs() override { return input_ids_; } + const std::vector& GetOutputs() override { return output_ids_; } + void ReshapeTensor(int id, const string& csv_values) override; + // Note: ResetTensor only works for input tensor. + void ResetTensor(int id) override; + + // no-op. SetInput will overwrite existing data . + void AllocateTensors() override {} + // no-op. Tf driver is not supposed to check the results. + void SetExpectation(int id, const string& csv_values) override {} + // tf driver is not supposed to check the results. + bool CheckResults() override { return false; } + + private: + std::unique_ptr session_; + std::vector input_ids_; + std::vector input_names_; + std::vector> input_shapes_; + std::vector input_types_; + std::unordered_map input_tensors_; + + std::vector output_ids_; + std::vector output_names_; + std::vector<::tensorflow::Tensor> output_tensors_; +}; + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_TESTING_TF_DRIVER_H_ diff --git a/tensorflow/contrib/lite/testing/tf_driver_test.cc b/tensorflow/contrib/lite/testing/tf_driver_test.cc new file mode 100644 index 0000000000..c0faa4676a --- /dev/null +++ b/tensorflow/contrib/lite/testing/tf_driver_test.cc @@ -0,0 +1,56 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/tf_driver.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +using ::testing::ElementsAre; + +TEST(TfDriverTest, SimpleTest) { + std::unique_ptr runner( + new TfDriver({"a", "b", "c", "d"}, {"float", "float", "float", "float"}, + {"1,8,8,3", "1,8,8,3", "1,8,8,3", "1,8,8,3"}, {"x", "y"})); + + runner->LoadModel( + "third_party/tensorflow/contrib/lite/testdata/multi_add.pb"); + EXPECT_TRUE(runner->IsValid()) << runner->GetErrorMessage(); + + ASSERT_THAT(runner->GetInputs(), ElementsAre(0, 1, 2, 3)); + ASSERT_THAT(runner->GetOutputs(), ElementsAre(0, 1)); + + for (int i : {0, 1, 2, 3}) { + runner->ReshapeTensor(i, "1,2,2,1"); + } + ASSERT_TRUE(runner->IsValid()); + + runner->SetInput(0, "0.1,0.2,0.3,0.4"); + runner->SetInput(1, "0.001,0.002,0.003,0.004"); + runner->SetInput(2, "0.001,0.002,0.003,0.004"); + runner->SetInput(3, "0.01,0.02,0.03,0.04"); + runner->ResetTensor(2); + runner->Invoke(); + + ASSERT_EQ(runner->ReadOutput(0), "0.101,0.202,0.303,0.404"); + ASSERT_EQ(runner->ReadOutput(1), "0.011,0.022,0.033,0.044"); +} + +} // namespace +} // namespace testing +} // namespace tflite -- GitLab From 4d90af18b92eb804bce2c334e718fddc691df28e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 10:37:47 -0800 Subject: [PATCH 172/629] Adding specialized logic for depthwise convolution of depth 20. PiperOrigin-RevId: 185708424 --- .../internal/optimized/depthwiseconv_float.h | 41 +++++++++++++++ .../internal/optimized/depthwiseconv_uint8.h | 50 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index e2c87df80b..7f6eea2d5d 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -573,6 +573,46 @@ struct FloatDepthwiseConvKernel { } }; +template <> +struct FloatDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const float* input_ptr, int input_ptr_increment, + const float* filter_ptr, float* acc_buffer_ptr) { + // Load the filters + float32x4_t filter_0 = vld1q_f32(filter_ptr + 4 * 0); + float32x4_t filter_1 = vld1q_f32(filter_ptr + 4 * 1); + float32x4_t filter_2 = vld1q_f32(filter_ptr + 4 * 2); + float32x4_t filter_3 = vld1q_f32(filter_ptr + 4 * 3); + float32x4_t filter_4 = vld1q_f32(filter_ptr + 4 * 4); + + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + // Load the inputs + const float input_val = *input_ptr; + input_ptr += input_ptr_increment; + // Load the accumulators from acc_buffer + float32x4_t acc_0 = vld1q_f32(acc_buffer_ptr + 4 * 0); + float32x4_t acc_1 = vld1q_f32(acc_buffer_ptr + 4 * 1); + float32x4_t acc_2 = vld1q_f32(acc_buffer_ptr + 4 * 2); + float32x4_t acc_3 = vld1q_f32(acc_buffer_ptr + 4 * 3); + float32x4_t acc_4 = vld1q_f32(acc_buffer_ptr + 4 * 4); + // Multiply-accumulate + acc_0 = vmlaq_n_f32(acc_0, filter_0, input_val); + acc_1 = vmlaq_n_f32(acc_1, filter_1, input_val); + acc_2 = vmlaq_n_f32(acc_2, filter_2, input_val); + acc_3 = vmlaq_n_f32(acc_3, filter_3, input_val); + acc_4 = vmlaq_n_f32(acc_4, filter_4, input_val); + // Store the accumulators back to acc_buffer + vst1q_f32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_f32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_f32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_f32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_f32(acc_buffer_ptr + 4 * 4, acc_4); + acc_buffer_ptr += 20; + } + } +}; + template <> struct FloatDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, @@ -926,6 +966,7 @@ inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 2, 1) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 3, 2) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index fc58978964..dbc4f0d6fd 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1205,6 +1205,55 @@ struct QuantizedDepthwiseConvKernel { } }; +template <> +struct QuantizedDepthwiseConvKernel { + static void Run(int num_output_pixels, int input_depth, int depth_multiplier, + const uint8* input_ptr, int16 input_offset, + int input_ptr_increment, const uint8* filter_ptr, + int16 filter_offset, int32* acc_buffer_ptr) { + // Load the filters, add filter_offset. + // NEON wants to load 8 bytes at a time, but 20 is not divisible by 8. + // We load the first 16 bytes into filter_u8_{0,1} as usual. + // Then we load the 8 last bytes into filter_u8_x (x for 'extra'). + // This is redundant: the first 4 bytes of filter_u8_x are the same + // as the last 4 bytes of filter_u8_x. + uint8x8_t filter_u8_0 = vld1_u8(filter_ptr + 8 * 0); + uint8x8_t filter_u8_1 = vld1_u8(filter_ptr + 8 * 1); + uint8x8_t filter_u8_x = vld1_u8(filter_ptr + 8 * 1 + 4); + int16x8_t filter_0 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_0)); + int16x8_t filter_1 = vreinterpretq_s16_u16(vmovl_u8(filter_u8_1)); + int16x8_t filter_x = vreinterpretq_s16_u16(vmovl_u8(filter_u8_x)); + filter_0 = vaddq_s16(filter_0, vdupq_n_s16(filter_offset)); + filter_1 = vaddq_s16(filter_1, vdupq_n_s16(filter_offset)); + filter_x = vaddq_s16(filter_x, vdupq_n_s16(filter_offset)); + // Handle one output pixel at a time. + for (int outp = 0; outp < num_output_pixels; outp++) { + uint8 input_u8 = *input_ptr; + input_ptr += input_ptr_increment; + int16 input = static_cast(input_u8 + input_offset); + // Load the accumulators from acc_buffer + int32x4_t acc_0 = vld1q_s32(acc_buffer_ptr + 4 * 0); + int32x4_t acc_1 = vld1q_s32(acc_buffer_ptr + 4 * 1); + int32x4_t acc_2 = vld1q_s32(acc_buffer_ptr + 4 * 2); + int32x4_t acc_3 = vld1q_s32(acc_buffer_ptr + 4 * 3); + int32x4_t acc_4 = vld1q_s32(acc_buffer_ptr + 4 * 4); + // Multiply-accumulate + acc_0 = vmlal_n_s16(acc_0, vget_low_s16(filter_0), input); + acc_1 = vmlal_n_s16(acc_1, vget_high_s16(filter_0), input); + acc_2 = vmlal_n_s16(acc_2, vget_low_s16(filter_1), input); + acc_3 = vmlal_n_s16(acc_3, vget_high_s16(filter_1), input); + acc_4 = vmlal_n_s16(acc_4, vget_high_s16(filter_x), input); + // Store the accumulators back to acc_buffer + vst1q_s32(acc_buffer_ptr + 4 * 0, acc_0); + vst1q_s32(acc_buffer_ptr + 4 * 1, acc_1); + vst1q_s32(acc_buffer_ptr + 4 * 2, acc_2); + vst1q_s32(acc_buffer_ptr + 4 * 3, acc_3); + vst1q_s32(acc_buffer_ptr + 4 * 4, acc_4); + acc_buffer_ptr += 20; + } + } +}; + template <> struct QuantizedDepthwiseConvKernel { static void Run(int num_output_pixels, int input_depth, int depth_multiplier, @@ -1691,6 +1740,7 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 2) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 16, 1) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 16) + TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 20) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 32) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 1, 8) TFMINI_USE_DEPTHWISECONV_KERNEL(true, 8, 1) -- GitLab From cedc74ee8ec418c39bac461e834636825163586a Mon Sep 17 00:00:00 2001 From: "David G. Andersen" Date: Wed, 14 Feb 2018 11:01:49 -0800 Subject: [PATCH 173/629] Call png_error if reading fails Allows faster exit from reading corrupt PNG files. PiperOrigin-RevId: 185712829 --- tensorflow/core/lib/png/png_io.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/core/lib/png/png_io.cc b/tensorflow/core/lib/png/png_io.cc index 77a3414442..cba473927d 100644 --- a/tensorflow/core/lib/png/png_io.cc +++ b/tensorflow/core/lib/png/png_io.cc @@ -90,11 +90,8 @@ void WarningHandler(png_structp png_ptr, png_const_charp msg) { void StringReader(png_structp png_ptr, png_bytep data, png_size_t length) { DecodeContext* const ctx = bit_cast(png_get_io_ptr(png_ptr)); if (static_cast(ctx->data_left) < length) { - if (!ctx->error_condition) { - VLOG(1) << "PNG read decoding error"; - ctx->error_condition = true; - } memset(data, 0, length); + png_error(png_ptr, "More bytes requested to read than available"); } else { memcpy(data, ctx->data, length); ctx->data += length; -- GitLab From 6e912b4219f32a283e835f52ae30c9e35e67e62e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 11:05:52 -0800 Subject: [PATCH 174/629] Make the TF Lite test driver more informative. PiperOrigin-RevId: 185713663 --- tensorflow/contrib/lite/testing/tflite_driver.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index bae639ea95..613223f3d4 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -106,8 +106,8 @@ class TfLiteDriver::Expectation { if (error_is_large) { good_output = false; if (verbose) { - std::cerr << " index " << i << ": " << reference - << " != " << computed << std::endl; + std::cerr << " index " << i << ": got " << computed + << ", but expected " << reference << std::endl; } } } @@ -203,6 +203,10 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) { void TfLiteDriver::SetExpectation(int id, const string& csv_values) { if (!IsValid()) return; auto* tensor = interpreter_->tensor(id); + if (expected_output_.count(id) != 0) { + fprintf(stderr, "Overriden expectation for tensor %d\n", id); + Invalidate("Overriden expectation"); + } expected_output_[id].reset(new Expectation); switch (tensor->type) { case kTfLiteFloat32: -- GitLab From 375e22d57b77e5eeb13b2d878690a2a984ad32dd Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Wed, 14 Feb 2018 11:06:17 -0800 Subject: [PATCH 175/629] Change the title of the compatibility guide document PiperOrigin-RevId: 185713755 --- tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md index 8e5e694a5c..b1bbb7c670 100644 --- a/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/contrib/lite/g3doc/tf_ops_compatibility.md @@ -1,4 +1,4 @@ -# TensorFlow Compatibility Guide +# TensorFlow Lite & TensorFlow Compatibility Guide TensorFlow Lite supports a number of TensorFlow operations used in common inference models. As they are processed by the TensorFlow Lite Optimizing -- GitLab From 3a5968d1aa66b5eea682d378450299cdd46693b9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 11:24:59 -0800 Subject: [PATCH 176/629] Check if ops used in the model are supported by op resolver PiperOrigin-RevId: 185716870 --- tensorflow/contrib/lite/tools/BUILD | 1 + tensorflow/contrib/lite/tools/verifier.cc | 33 +++++++++- tensorflow/contrib/lite/tools/verifier.h | 5 +- .../contrib/lite/tools/verifier_test.cc | 62 ++++++++++++++++--- 4 files changed, 90 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 6786b16184..999ccf2ebc 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -112,6 +112,7 @@ cc_test( size = "small", srcs = ["verifier_test.cc"], deps = [ + ":mutable_op_resolver", ":verifier", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:schema_fbs_version", diff --git a/tensorflow/contrib/lite/tools/verifier.cc b/tensorflow/contrib/lite/tools/verifier.cc index 726e2aaa31..59c74205f0 100644 --- a/tensorflow/contrib/lite/tools/verifier.cc +++ b/tensorflow/contrib/lite/tools/verifier.cc @@ -155,11 +155,11 @@ bool VerifyTensors(const Model& model, ErrorReporter* error_reporter) { } for (const auto& subgraph : *model.subgraphs()) { if (!subgraph->tensors()) { - return true; + continue; } for (const auto& tensor : *subgraph->tensors()) { if (!tensor->buffer()) { - return true; + continue; } if (tensor->buffer() >= model.buffers()->size()) { ReportError(error_reporter, "Invalid tensor buffer index: %d", @@ -187,9 +187,33 @@ bool VerifyTensors(const Model& model, ErrorReporter* error_reporter) { return true; } +bool VerifyOps(const Model& model, const OpResolver& resolver, + ErrorReporter* error_reporter) { + if (!model.operator_codes()) { + return true; + } + for (const auto& opcode : *model.operator_codes()) { + if (opcode->builtin_code() == BuiltinOperator_CUSTOM) { + if (!resolver.FindOp(opcode->custom_code()->c_str())) { + ReportError(error_reporter, "Unsupported custom op: %s", + opcode->custom_code()->c_str()); + return false; + } + } else { + if (!resolver.FindOp(opcode->builtin_code())) { + ReportError(error_reporter, "Unsupported builtin op: %s", + EnumNameBuiltinOperator(opcode->builtin_code())); + return false; + } + } + } + return true; +} + } // namespace -bool Verify(const void* buf, size_t len, ErrorReporter* error_reporter) { +bool Verify(const void* buf, size_t len, const OpResolver& resolver, + ErrorReporter* error_reporter) { const Model* model = VerifyFlatbufferAndGetModel(buf, len); if (model == nullptr) { ReportError(error_reporter, "Invalid flatbuffer format"); @@ -202,6 +226,9 @@ bool Verify(const void* buf, size_t len, ErrorReporter* error_reporter) { if (!VerifyTensors(*model, error_reporter)) { return false; } + if (!VerifyOps(*model, resolver, error_reporter)) { + return false; + } return true; } } // namespace tflite diff --git a/tensorflow/contrib/lite/tools/verifier.h b/tensorflow/contrib/lite/tools/verifier.h index d2bf3c91d5..c2ee11215c 100644 --- a/tensorflow/contrib/lite/tools/verifier.h +++ b/tensorflow/contrib/lite/tools/verifier.h @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/model.h" namespace tflite { @@ -26,7 +27,9 @@ namespace tflite { // Currently, it verifies: // * The file is following a legit flatbuffer schema. // * The model is in supported version. -bool Verify(const void* buf, size_t len, ErrorReporter* error_reporter); +// * All ops used in the model are supported by OpResolver. +bool Verify(const void* buf, size_t len, const OpResolver& resolver, + ErrorReporter* error_reporter); } // namespace tflite diff --git a/tensorflow/contrib/lite/tools/verifier_test.cc b/tensorflow/contrib/lite/tools/verifier_test.cc index 87f6854e9e..b3e611f999 100644 --- a/tensorflow/contrib/lite/tools/verifier_test.cc +++ b/tensorflow/contrib/lite/tools/verifier_test.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/error_reporter.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" #include "tensorflow/contrib/lite/testing/util.h" +#include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" #include "tensorflow/contrib/lite/tools/verifier.h" #include "tensorflow/contrib/lite/version.h" #include "tensorflow/core/framework/numeric_types.h" @@ -40,6 +41,19 @@ class TfLiteFlatbufferModelBuilder { CreateBuffer(builder_, builder_.CreateVector(std::vector{}))); } + TfLiteFlatbufferModelBuilder(const std::vector& builtin_ops, + const std::vector& custom_ops) { + buffers_.push_back( + CreateBuffer(builder_, builder_.CreateVector(std::vector{}))); + + for (const auto& iter : builtin_ops) { + resolver_.AddBuiltin(iter, &fake_op_); + } + for (const auto& iter : custom_ops) { + resolver_.AddCustom(iter.data(), &fake_op_); + } + } + void AddTensor(const std::vector& shape, tflite::TensorType type, const std::vector& buffer, const char* name) { int buffer_index = 0; @@ -79,11 +93,13 @@ class TfLiteFlatbufferModelBuilder { bool Verify() { return tflite::Verify(builder_.GetBufferPointer(), builder_.GetSize(), - DefaultErrorReporter()); + resolver_, DefaultErrorReporter()); } private: FlatBufferBuilder builder_; + MutableOpResolver resolver_; + TfLiteRegistration fake_op_; std::vector> operators_; std::vector> operator_codes_; std::vector> tensors_; @@ -98,11 +114,11 @@ TEST(VerifyModel, TestEmptyModel) { ::tflite::FinishModelBuffer(builder, model); ASSERT_TRUE(Verify(builder.GetBufferPointer(), builder.GetSize(), - DefaultErrorReporter())); + MutableOpResolver{}, DefaultErrorReporter())); } TEST(VerifyModel, TestSimpleModel) { - TfLiteFlatbufferModelBuilder builder; + TfLiteFlatbufferModelBuilder builder({}, {"test"}); builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "test"); builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4, 5, 6}, "input"); builder.AddTensor( @@ -116,7 +132,8 @@ TEST(VerifyModel, TestSimpleModel) { TEST(VerifyModel, TestCorruptedData) { std::string model = "123"; - ASSERT_FALSE(Verify(model.data(), model.size(), /*error_reporter=*/nullptr)); + ASSERT_FALSE(Verify(model.data(), model.size(), MutableOpResolver{}, + /*error_reporter=*/nullptr)); } TEST(VerifyModel, TestUnsupportedVersion) { @@ -125,7 +142,7 @@ TEST(VerifyModel, TestUnsupportedVersion) { /*subgraphs=*/0, /*description=*/0, /*buffers=*/0); ::tflite::FinishModelBuffer(builder, model); ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), - DefaultErrorReporter())); + MutableOpResolver{}, DefaultErrorReporter())); } TEST(VerifyModel, TestRandomModificationIsNotAllowed) { @@ -140,7 +157,7 @@ TEST(VerifyModel, TestRandomModificationIsNotAllowed) { for (int i = 0; i < model_content.size(); i++) { model_content[i] = (model_content[i] + 137) % 255; EXPECT_FALSE(Verify(model_content.data(), model_content.size(), - DefaultErrorReporter())) + MutableOpResolver{}, DefaultErrorReporter())) << "Fail at position: " << i; } } @@ -188,7 +205,7 @@ TEST(VerifyModel, TensorBufferIsNotValid) { ::tflite::FinishModelBuffer(builder, model); ASSERT_FALSE(Verify(builder.GetBufferPointer(), builder.GetSize(), - DefaultErrorReporter())); + MutableOpResolver{}, DefaultErrorReporter())); } TEST(VerifyModel, StringTensorHasInvalidNumString) { @@ -229,6 +246,37 @@ TEST(VerifyModel, StringTensorIsLargerThanRequired) { ASSERT_FALSE(builder.Verify()); } +TEST(VerifyModel, AllOpsAreSupported) { + TfLiteFlatbufferModelBuilder builder({BuiltinOperator_ADD}, {"CustomOp"}); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input1"); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input2"); + builder.AddTensor({2, 3}, TensorType_UINT8, {}, "output"); + builder.AddOperator({0, 1}, {2}, BuiltinOperator_ADD, nullptr); + builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "CustomOp"); + builder.FinishModel({}, {}); + ASSERT_FALSE(builder.Verify()); +} + +TEST(VerifyModel, UseUnsupportedBuiltinOps) { + TfLiteFlatbufferModelBuilder builder({BuiltinOperator_SUB}, {"CustomOp"}); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input1"); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input2"); + builder.AddTensor({2, 3}, TensorType_UINT8, {}, "output"); + builder.AddOperator({0, 1}, {2}, BuiltinOperator_ADD, nullptr); + builder.FinishModel({}, {}); + ASSERT_FALSE(builder.Verify()); +} + +TEST(VerifyModel, UseUnsupportedCustomOps) { + TfLiteFlatbufferModelBuilder builder({BuiltinOperator_ADD}, {"NewOp"}); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input1"); + builder.AddTensor({2, 3}, TensorType_UINT8, {1, 2, 3, 4}, "input2"); + builder.AddTensor({2, 3}, TensorType_UINT8, {}, "output"); + builder.AddOperator({0, 1}, {2}, BuiltinOperator_CUSTOM, "Not supported"); + builder.FinishModel({}, {}); + ASSERT_FALSE(builder.Verify()); +} + // TODO(yichengfan): make up malicious files to test with. } // namespace tflite -- GitLab From eeba54b939a8c3a0da1805a52163f39afb2bd940 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 11:28:29 -0800 Subject: [PATCH 177/629] Minor refactoring: rename the _canonicalization.py files, consistent with the list comprehension transformer. PiperOrigin-RevId: 185717464 --- tensorflow/contrib/py2tf/converters/BUILD | 18 +++++++-------- ...anonicalization.py => break_statements.py} | 0 ...ation_test.py => break_statements_test.py} | 10 ++++---- ...nicalization.py => continue_statements.py} | 0 ...on_test.py => continue_statements_test.py} | 10 ++++---- .../{for_canonicalization.py => for_loops.py} | 0 ...nicalization_test.py => for_loops_test.py} | 6 ++--- tensorflow/contrib/py2tf/impl/BUILD | 1 + tensorflow/contrib/py2tf/impl/conversion.py | 23 +++++++++++++------ 9 files changed, 39 insertions(+), 29 deletions(-) rename tensorflow/contrib/py2tf/converters/{break_canonicalization.py => break_statements.py} (100%) rename tensorflow/contrib/py2tf/converters/{break_canonicalization_test.py => break_statements_test.py} (91%) rename tensorflow/contrib/py2tf/converters/{continue_canonicalization.py => continue_statements.py} (100%) rename tensorflow/contrib/py2tf/converters/{continue_canonicalization_test.py => continue_statements_test.py} (89%) rename tensorflow/contrib/py2tf/converters/{for_canonicalization.py => for_loops.py} (100%) rename tensorflow/contrib/py2tf/converters/{for_canonicalization_test.py => for_loops_test.py} (88%) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index b6574fc183..93c751b28d 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -18,13 +18,13 @@ py_library( name = "converters", srcs = [ "asserts.py", - "break_canonicalization.py", + "break_statements.py", "builtin_functions.py", "call_trees.py", - "continue_canonicalization.py", + "continue_statements.py", "control_flow.py", "decorators.py", - "for_canonicalization.py", + "for_loops.py", "list_comprehension.py", "logical_expressions.py", "side_effect_guards.py", @@ -64,8 +64,8 @@ py_test( ) py_test( - name = "break_canonicalization_test", - srcs = ["break_canonicalization_test.py"], + name = "break_statements_test", + srcs = ["break_statements_test.py"], srcs_version = "PY2AND3", deps = [ ":test_lib", @@ -97,8 +97,8 @@ py_test( ) py_test( - name = "continue_canonicalization_test", - srcs = ["continue_canonicalization_test.py"], + name = "continue_statements_test", + srcs = ["continue_statements_test.py"], srcs_version = "PY2AND3", deps = [ ":test_lib", @@ -130,8 +130,8 @@ py_test( ) py_test( - name = "for_canonicalization_test", - srcs = ["for_canonicalization_test.py"], + name = "for_loops_test", + srcs = ["for_loops_test.py"], deps = [ ":test_lib", "//tensorflow/contrib/py2tf/pyct", diff --git a/tensorflow/contrib/py2tf/converters/break_canonicalization.py b/tensorflow/contrib/py2tf/converters/break_statements.py similarity index 100% rename from tensorflow/contrib/py2tf/converters/break_canonicalization.py rename to tensorflow/contrib/py2tf/converters/break_statements.py diff --git a/tensorflow/contrib/py2tf/converters/break_canonicalization_test.py b/tensorflow/contrib/py2tf/converters/break_statements_test.py similarity index 91% rename from tensorflow/contrib/py2tf/converters/break_canonicalization_test.py rename to tensorflow/contrib/py2tf/converters/break_statements_test.py index 2243398100..095fcdff07 100644 --- a/tensorflow/contrib/py2tf/converters/break_canonicalization_test.py +++ b/tensorflow/contrib/py2tf/converters/break_statements_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for break_canonicalization module.""" +"""Tests for break_statements module.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import break_canonicalization +from tensorflow.contrib.py2tf.converters import break_statements from tensorflow.contrib.py2tf.converters import converter_test_base from tensorflow.python.platform import test @@ -37,7 +37,7 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): return v node = self.parse_and_analyze(test_fn, {}) - node = break_canonicalization.transform(node, self.ctx) + node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: self.assertEqual(test_fn(0), result.test_fn(0)) @@ -69,7 +69,7 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): return v node = self.parse_and_analyze(test_fn, {}) - node = break_canonicalization.transform(node, self.ctx) + node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: # The break is incompletely canonicalized. Everything is in place, but @@ -98,7 +98,7 @@ class BreakCanonicalizationTest(converter_test_base.TestCase): return v, u, w node = self.parse_and_analyze(test_fn, {}) - node = break_canonicalization.transform(node, self.ctx) + node = break_statements.transform(node, self.ctx) with self.compiled(node) as result: self.assertEqual(test_fn(0), result.test_fn(0)) diff --git a/tensorflow/contrib/py2tf/converters/continue_canonicalization.py b/tensorflow/contrib/py2tf/converters/continue_statements.py similarity index 100% rename from tensorflow/contrib/py2tf/converters/continue_canonicalization.py rename to tensorflow/contrib/py2tf/converters/continue_statements.py diff --git a/tensorflow/contrib/py2tf/converters/continue_canonicalization_test.py b/tensorflow/contrib/py2tf/converters/continue_statements_test.py similarity index 89% rename from tensorflow/contrib/py2tf/converters/continue_canonicalization_test.py rename to tensorflow/contrib/py2tf/converters/continue_statements_test.py index 2a0fb2d88b..a598dcd1ae 100644 --- a/tensorflow/contrib/py2tf/converters/continue_canonicalization_test.py +++ b/tensorflow/contrib/py2tf/converters/continue_statements_test.py @@ -12,13 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for continue_canonicalization module.""" +"""Tests for continue_statements module.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.py2tf.converters import continue_canonicalization +from tensorflow.contrib.py2tf.converters import continue_statements from tensorflow.contrib.py2tf.converters import converter_test_base from tensorflow.python.platform import test @@ -37,7 +37,7 @@ class ContinueCanonicalizationTest(converter_test_base.TestCase): return v node = self.parse_and_analyze(test_fn, {}) - node = continue_canonicalization.transform(node, self.ctx) + node = continue_statements.transform(node, self.ctx) with self.compiled(node) as result: self.assertEqual(test_fn(0), result.test_fn(0)) @@ -58,7 +58,7 @@ class ContinueCanonicalizationTest(converter_test_base.TestCase): return v node = self.parse_and_analyze(test_fn, {}) - node = continue_canonicalization.transform(node, self.ctx) + node = continue_statements.transform(node, self.ctx) with self.compiled(node) as result: self.assertEqual(test_fn([]), result.test_fn([])) @@ -84,7 +84,7 @@ class ContinueCanonicalizationTest(converter_test_base.TestCase): return v, u, w node = self.parse_and_analyze(test_fn, {}) - node = continue_canonicalization.transform(node, self.ctx) + node = continue_statements.transform(node, self.ctx) with self.compiled(node) as result: self.assertEqual(test_fn(0), result.test_fn(0)) diff --git a/tensorflow/contrib/py2tf/converters/for_canonicalization.py b/tensorflow/contrib/py2tf/converters/for_loops.py similarity index 100% rename from tensorflow/contrib/py2tf/converters/for_canonicalization.py rename to tensorflow/contrib/py2tf/converters/for_loops.py diff --git a/tensorflow/contrib/py2tf/converters/for_canonicalization_test.py b/tensorflow/contrib/py2tf/converters/for_loops_test.py similarity index 88% rename from tensorflow/contrib/py2tf/converters/for_canonicalization_test.py rename to tensorflow/contrib/py2tf/converters/for_loops_test.py index 910c4dcc00..70a367d3b5 100644 --- a/tensorflow/contrib/py2tf/converters/for_canonicalization_test.py +++ b/tensorflow/contrib/py2tf/converters/for_loops_test.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for for_canonicalization module.""" +"""Tests for for_loops module.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.converters import converter_test_base -from tensorflow.contrib.py2tf.converters import for_canonicalization +from tensorflow.contrib.py2tf.converters import for_loops from tensorflow.python.platform import test @@ -34,7 +34,7 @@ class ControlFlowTest(converter_test_base.TestCase): return s node = self.parse_and_analyze(test_fn, {}) - node = for_canonicalization.transform(node, self.ctx) + node = for_loops.transform(node, self.ctx) with self.compiled(node) as result: l = [1, 2, 3] diff --git a/tensorflow/contrib/py2tf/impl/BUILD b/tensorflow/contrib/py2tf/impl/BUILD index f5378917a3..90ffabbc9b 100644 --- a/tensorflow/contrib/py2tf/impl/BUILD +++ b/tensorflow/contrib/py2tf/impl/BUILD @@ -28,6 +28,7 @@ py_library( "//tensorflow/contrib/py2tf/converters", "//tensorflow/contrib/py2tf/pyct", "//tensorflow/contrib/py2tf/pyct/static_analysis", + "//tensorflow/contrib/py2tf/utils", "@gast_archive//:gast", "@six_archive//:six", ], diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index ff4f159975..ca13910ae5 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -21,14 +21,15 @@ from __future__ import print_function import gast import six +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.converters import asserts -from tensorflow.contrib.py2tf.converters import break_canonicalization +from tensorflow.contrib.py2tf.converters import break_statements from tensorflow.contrib.py2tf.converters import builtin_functions from tensorflow.contrib.py2tf.converters import call_trees -from tensorflow.contrib.py2tf.converters import continue_canonicalization +from tensorflow.contrib.py2tf.converters import continue_statements from tensorflow.contrib.py2tf.converters import control_flow from tensorflow.contrib.py2tf.converters import decorators -from tensorflow.contrib.py2tf.converters import for_canonicalization +from tensorflow.contrib.py2tf.converters import for_loops from tensorflow.contrib.py2tf.converters import logical_expressions from tensorflow.contrib.py2tf.converters import side_effect_guards from tensorflow.contrib.py2tf.impl import config @@ -184,6 +185,13 @@ def function_to_graph(f, conversion_map, arg_values, arg_types, fn = e.cell_contents namespace[fn.__name__] = fn + # Manually add the utils namespace which may be used from generated code. + if 'py2tf_util' not in namespace: + namespace['py2tf_utils'] = utils + elif namespace['py2tf_utils'] != utils: + raise ValueError( + 'The module name py2tf_utils is reserved and may not be used.') + namer = conversion_map.new_namer(namespace) ctx = context.EntityContext( namer=namer, @@ -247,19 +255,20 @@ def node_to_graph(node, ctx, nocompile_decorators): # TODO(mdan): Is it feasible to reconstruct intermediate source code? ctx.source_code = None node = decorators.transform(node, nocompile_decorators) - node = break_canonicalization.transform(node, ctx) + node = break_statements.transform(node, ctx) node = asserts.transform(node, ctx) # Note: sequencing continue canonicalization before for loop one avoids # dealing with the extra loop increment operation that the for # canonicalization creates. - node = continue_canonicalization.transform(node, ctx) + node = continue_statements.transform(node, ctx) ctx.namespace['len'] = len node = _static_analysis_pass(node, ctx) - node = for_canonicalization.transform(node, ctx) - # for_canonicalization may insert new global references. + node = for_loops.transform(node, ctx) + # for_loops may insert new global references. node = builtin_functions.transform(node, ctx) + # TODO(mdan): Kept for CL consistency. Remove. # builtin_functions may insert new global references. ctx.namespace['print'] = print -- GitLab From 71c68c4e9b4b744673ed0b07294c5f0d152686a0 Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Wed, 14 Feb 2018 12:18:48 -0800 Subject: [PATCH 178/629] [XLA] Add a test for map with static operands in the local Python client. PiperOrigin-RevId: 185725205 --- tensorflow/compiler/xla/python/xla_client_test.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index 65720c6ef9..7565e8146b 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -881,6 +881,13 @@ class EmbeddedComputationsTest(LocalComputationTest): c.Mul(c.ParameterFromNumpy(NumpyArrayF32(0)), c.ConstantF32Scalar(2.0)) return c.Build() + def _CreateMulF32ByParamComputation(self): + """Computation (f32) -> f32 that multiplies one parameter by the other.""" + c = self._NewComputation("mul_f32_by_param") + c.Mul(c.ParameterFromNumpy(NumpyArrayF32(0)), + c.ParameterFromNumpy(NumpyArrayF32(0))) + return c.Build() + def _CreateMulF64By2Computation(self): """Computation (f64) -> f64 that multiplies its parameter by 2.""" c = self._NewComputation("mul_f64_by2") @@ -1021,6 +1028,14 @@ class EmbeddedComputationsTest(LocalComputationTest): self._CreateBinaryDivF64Computation(), [0]) self._ExecuteAndCompareClose(c, expected=[0.2, 0.4, 0.75, 1.0]) + def DISABLED_testMapWithStaticOperands(self): + c = self._NewComputation() + factor = c.ConstantF32Scalar(3.0) + c.Map([c.Constant(NumpyArrayF32([1.0, 2.0, 3.0, 4.0]))], + self._CreateMulF32ByParamComputation(), [0], + static_operands=[factor]) + self._ExecuteAndCompareClose(c, expected=[3.0, 6.0, 9.0, 12.0]) + def testSelectAndScatterF32(self): c = self._NewComputation() c.SelectAndScatter(c.Constant(NumpyArrayF32([[1., 2., 6.], [4., 5., 3.]])), -- GitLab From 22176cb7af53caa778e4e793172ed4211ed17141 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 14 Feb 2018 12:21:30 -0800 Subject: [PATCH 179/629] Return Status instead of bool in Init(), Flush(), and Close(). With tf.contrib.summary, a remote worker will be writing summaries. If they just LOG the error, then the user doesn't see them. PiperOrigin-RevId: 185725556 --- .../tensorboard/db/summary_file_writer.cc | 11 ++- tensorflow/core/util/events_writer.cc | 90 +++++++++---------- tensorflow/core/util/events_writer.h | 19 ++-- tensorflow/core/util/events_writer_test.cc | 20 ++--- tensorflow/python/client/events_writer.i | 3 + 5 files changed, 72 insertions(+), 71 deletions(-) diff --git a/tensorflow/contrib/tensorboard/db/summary_file_writer.cc b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc index 3868b1172f..85b3e7231b 100644 --- a/tensorflow/contrib/tensorboard/db/summary_file_writer.cc +++ b/tensorflow/contrib/tensorboard/db/summary_file_writer.cc @@ -47,9 +47,9 @@ class SummaryFileWriter : public SummaryWriterInterface { mutex_lock ml(mu_); events_writer_ = tensorflow::MakeUnique(io::JoinPath(logdir, "events")); - if (!events_writer_->InitWithSuffix(filename_suffix)) { - return errors::Unknown("Could not initialize events writer."); - } + TF_RETURN_WITH_CONTEXT_IF_ERROR( + events_writer_->InitWithSuffix(filename_suffix), + "Could not initialize events writer."); last_flush_ = env_->NowMicros(); is_initialized_ = true; return Status::OK(); @@ -151,9 +151,8 @@ class SummaryFileWriter : public SummaryWriterInterface { events_writer_->WriteEvent(*e); } queue_.clear(); - if (!events_writer_->Flush()) { - return errors::InvalidArgument("Could not flush events file."); - } + TF_RETURN_WITH_CONTEXT_IF_ERROR(events_writer_->Flush(), + "Could not flush events file."); last_flush_ = env_->NowMicros(); return Status::OK(); } diff --git a/tensorflow/core/util/events_writer.cc b/tensorflow/core/util/events_writer.cc index 23b00e23dd..49507616ed 100644 --- a/tensorflow/core/util/events_writer.cc +++ b/tensorflow/core/util/events_writer.cc @@ -17,6 +17,7 @@ limitations under the License. #include // for NULL +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -35,10 +36,21 @@ EventsWriter::EventsWriter(const string& file_prefix) file_prefix_(file_prefix), num_outstanding_events_(0) {} -bool EventsWriter::InitIfNeeded() { +EventsWriter::~EventsWriter() { + Close().IgnoreError(); // Autoclose in destructor. +} + +Status EventsWriter::Init() { return InitWithSuffix(""); } + +Status EventsWriter::InitWithSuffix(const string& suffix) { + file_suffix_ = suffix; + return InitIfNeeded(); +} + +Status EventsWriter::InitIfNeeded() { if (recordio_writer_ != nullptr) { CHECK(!filename_.empty()); - if (FileHasDisappeared()) { + if (!FileStillExists().ok()) { // Warn user of data loss and let .reset() below do basic cleanup. if (num_outstanding_events_ > 0) { LOG(WARNING) << "Re-initialization, attempting to open a new file, " @@ -46,7 +58,7 @@ bool EventsWriter::InitIfNeeded() { } } else { // No-op: File is present and writer is initialized. - return true; + return Status::OK(); } } @@ -57,15 +69,12 @@ bool EventsWriter::InitIfNeeded() { static_cast(time_in_seconds), port::Hostname().c_str(), file_suffix_.c_str()); - Status s = env_->NewWritableFile(filename_, &recordio_file_); - if (!s.ok()) { - LOG(ERROR) << "Could not open events file: " << filename_ << ": " << s; - return false; - } + TF_RETURN_WITH_CONTEXT_IF_ERROR( + env_->NewWritableFile(filename_, &recordio_file_), + "Creating writable file ", filename_); recordio_writer_.reset(new io::RecordWriter(recordio_file_.get())); if (recordio_writer_ == nullptr) { - LOG(ERROR) << "Could not create record writer"; - return false; + return errors::Unknown("Could not create record writer"); } num_outstanding_events_ = 0; VLOG(1) << "Successfully opened events file: " << filename_; @@ -77,21 +86,21 @@ bool EventsWriter::InitIfNeeded() { event.set_wall_time(time_in_seconds); event.set_file_version(strings::StrCat(kVersionPrefix, kCurrentVersion)); WriteEvent(event); - Flush(); + TF_RETURN_WITH_CONTEXT_IF_ERROR(Flush(), "Flushing first event."); } - return true; + return Status::OK(); } string EventsWriter::FileName() { if (filename_.empty()) { - InitIfNeeded(); + InitIfNeeded().IgnoreError(); } return filename_; } void EventsWriter::WriteSerializedEvent(StringPiece event_str) { if (recordio_writer_ == nullptr) { - if (!InitIfNeeded()) { + if (!InitIfNeeded().ok()) { LOG(ERROR) << "Write failed because file could not be opened."; return; } @@ -108,60 +117,51 @@ void EventsWriter::WriteEvent(const Event& event) { WriteSerializedEvent(record); } -bool EventsWriter::Flush() { - if (num_outstanding_events_ == 0) return true; +Status EventsWriter::Flush() { + if (num_outstanding_events_ == 0) return Status::OK(); CHECK(recordio_file_ != nullptr) << "Unexpected NULL file"; - if (!recordio_writer_->Flush().ok()) { - LOG(ERROR) << "Failed to flush " << num_outstanding_events_ << " events to " - << filename_; - return false; - } + TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_writer_->Flush(), "Failed to flush ", + num_outstanding_events_, " to ", filename_); + TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_file_->Sync(), "Failed to sync ", + num_outstanding_events_, " to ", filename_); - // The FileHasDisappeared() condition is necessary because - // recordio_writer_->Sync() can return true even if the underlying + // The FileStillExists() condition is necessary because + // recordio_writer_->Sync() can return OK even if the underlying // file has been deleted. EventWriter.FileDeletionBeforeWriting // demonstrates this and will fail if the FileHasDisappeared() // condition is removed. // Also, we deliberately attempt to Sync() before checking for a // disappearing file, in case for some file system File::Exists() is // false after File::Open() but before File::Sync(). - if (!recordio_file_->Flush().ok() || !recordio_file_->Sync().ok() || - FileHasDisappeared()) { - LOG(ERROR) << "Failed to flush " << num_outstanding_events_ << " events to " - << filename_; - return false; - } + TF_RETURN_WITH_CONTEXT_IF_ERROR(FileStillExists(), "Failed to flush ", + num_outstanding_events_, " to ", filename_); VLOG(1) << "Wrote " << num_outstanding_events_ << " events to disk."; num_outstanding_events_ = 0; - return true; + return Status::OK(); } -bool EventsWriter::Close() { - bool return_value = Flush(); +Status EventsWriter::Close() { + Status status = Flush(); if (recordio_file_ != nullptr) { - Status s = recordio_file_->Close(); - if (!s.ok()) { - LOG(ERROR) << "Error when closing previous event file: " << filename_ - << ": " << s; - return_value = false; + Status close_status = recordio_file_->Close(); + if (!close_status.ok()) { + status = close_status; } recordio_writer_.reset(nullptr); recordio_file_.reset(nullptr); } num_outstanding_events_ = 0; - return return_value; + return status; } -bool EventsWriter::FileHasDisappeared() { +Status EventsWriter::FileStillExists() { if (env_->FileExists(filename_).ok()) { - return false; - } else { - // This can happen even with non-null recordio_writer_ if some other - // process has removed the file. - LOG(ERROR) << "The events file " << filename_ << " has disappeared."; - return true; + return Status::OK(); } + // This can happen even with non-null recordio_writer_ if some other + // process has removed the file. + return errors::Unknown("The events file ", filename_, " has disappeared."); } } // namespace tensorflow diff --git a/tensorflow/core/util/events_writer.h b/tensorflow/core/util/events_writer.h index a1a8cf790d..5dbaf97af4 100644 --- a/tensorflow/core/util/events_writer.h +++ b/tensorflow/core/util/events_writer.h @@ -18,6 +18,8 @@ limitations under the License. #include #include + +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/io/record_writer.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/macros.h" @@ -43,7 +45,7 @@ class EventsWriter { // Note that it is not recommended to simultaneously have two // EventWriters writing to the same file_prefix. explicit EventsWriter(const string& file_prefix); - ~EventsWriter() { Close(); } // Autoclose in destructor. + ~EventsWriter(); // Sets the event file filename and opens file for writing. If not called by // user, will be invoked automatically by a call to FileName() or Write*(). @@ -51,11 +53,8 @@ class EventsWriter { // and is open this is a no-op. If on the other hand the file was opened, // but has since disappeared (e.g. deleted by another process), this will open // a new file with a new timestamp in its filename. - bool Init() { return InitWithSuffix(""); } - bool InitWithSuffix(const string& suffix) { - file_suffix_ = suffix; - return InitIfNeeded(); - } + Status Init(); + Status InitWithSuffix(const string& suffix); // Returns the filename for the current events file: // filename_ = [file_prefix_].out.events.[timestamp].[hostname][suffix] @@ -77,12 +76,12 @@ class EventsWriter { // be written too. // Close() calls Flush() and then closes the current events file. // Returns true only if both the flush and the closure were successful. - bool Flush(); - bool Close(); + Status Flush(); + Status Close(); private: - bool FileHasDisappeared(); // True if event_file_path_ does not exist. - bool InitIfNeeded(); + Status FileStillExists(); // OK if event_file_path_ exists. + Status InitIfNeeded(); Env* env_; const string file_prefix_; diff --git a/tensorflow/core/util/events_writer_test.cc b/tensorflow/core/util/events_writer_test.cc index a6286ea701..a75b26abc6 100644 --- a/tensorflow/core/util/events_writer_test.cc +++ b/tensorflow/core/util/events_writer_test.cc @@ -112,7 +112,7 @@ TEST(EventWriter, WriteFlush) { string file_prefix = GetDirName("/writeflush_test"); EventsWriter writer(file_prefix); WriteFile(&writer); - EXPECT_TRUE(writer.Flush()); + TF_EXPECT_OK(writer.Flush()); string filename = writer.FileName(); VerifyFile(filename); } @@ -121,7 +121,7 @@ TEST(EventWriter, WriteClose) { string file_prefix = GetDirName("/writeclose_test"); EventsWriter writer(file_prefix); WriteFile(&writer); - EXPECT_TRUE(writer.Close()); + TF_EXPECT_OK(writer.Close()); string filename = writer.FileName(); VerifyFile(filename); } @@ -143,7 +143,7 @@ TEST(EventWriter, FailFlush) { TF_EXPECT_OK(env()->FileExists(filename)); TF_ASSERT_OK(env()->DeleteFile(filename)); EXPECT_EQ(errors::Code::NOT_FOUND, env()->FileExists(filename).code()); - EXPECT_FALSE(writer.Flush()); + EXPECT_FALSE(writer.Flush().ok()); EXPECT_EQ(errors::Code::NOT_FOUND, env()->FileExists(filename).code()); } @@ -155,18 +155,18 @@ TEST(EventWriter, FailClose) { TF_EXPECT_OK(env()->FileExists(filename)); TF_ASSERT_OK(env()->DeleteFile(filename)); EXPECT_EQ(errors::Code::NOT_FOUND, env()->FileExists(filename).code()); - EXPECT_FALSE(writer.Close()); + EXPECT_FALSE(writer.Close().ok()); EXPECT_EQ(errors::Code::NOT_FOUND, env()->FileExists(filename).code()); } TEST(EventWriter, InitWriteClose) { string file_prefix = GetDirName("/initwriteclose_test"); EventsWriter writer(file_prefix); - EXPECT_TRUE(writer.Init()); + TF_EXPECT_OK(writer.Init()); string filename0 = writer.FileName(); TF_EXPECT_OK(env()->FileExists(filename0)); WriteFile(&writer); - EXPECT_TRUE(writer.Close()); + TF_EXPECT_OK(writer.Close()); string filename1 = writer.FileName(); EXPECT_EQ(filename0, filename1); VerifyFile(filename1); @@ -178,7 +178,7 @@ TEST(EventWriter, NameWriteClose) { string filename = writer.FileName(); TF_EXPECT_OK(env()->FileExists(filename)); WriteFile(&writer); - EXPECT_TRUE(writer.Close()); + TF_EXPECT_OK(writer.Close()); VerifyFile(filename); } @@ -186,7 +186,7 @@ TEST(EventWriter, NameClose) { string file_prefix = GetDirName("/nameclose_test"); EventsWriter writer(file_prefix); string filename = writer.FileName(); - EXPECT_TRUE(writer.Close()); + TF_EXPECT_OK(writer.Close()); TF_EXPECT_OK(env()->FileExists(filename)); TF_ASSERT_OK(env()->DeleteFile(filename)); } @@ -199,9 +199,9 @@ TEST(EventWriter, FileDeletionBeforeWriting) { env()->SleepForMicroseconds( 2000000); // To make sure timestamp part of filename will differ. TF_ASSERT_OK(env()->DeleteFile(filename0)); - EXPECT_TRUE(writer.Init()); // Init should reopen file. + TF_EXPECT_OK(writer.Init()); // Init should reopen file. WriteFile(&writer); - EXPECT_TRUE(writer.Flush()); + TF_EXPECT_OK(writer.Flush()); string filename1 = writer.FileName(); EXPECT_NE(filename0, filename1); VerifyFile(filename1); diff --git a/tensorflow/python/client/events_writer.i b/tensorflow/python/client/events_writer.i index de030fcb42..c72b76b8fa 100644 --- a/tensorflow/python/client/events_writer.i +++ b/tensorflow/python/client/events_writer.i @@ -23,6 +23,9 @@ limitations under the License. %nodefaultctor EventsWriter; +%ignore tensorflow::Status::operator=; +%include "tensorflow/core/lib/core/status.h" + %ignoreall %unignore tensorflow; %unignore tensorflow::EventsWriter; -- GitLab From dd5dab7c45e0089af81cfe2955bf3e24a7917771 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 12:24:33 -0800 Subject: [PATCH 180/629] Fixing IdentifyRelu1 transform to recognize min+max in addition to max+min. PiperOrigin-RevId: 185725946 --- .../graph_transformations/identify_relu1.cc | 67 ++++++++++++------- .../reorder_activation_functions.cc | 18 +++++ tensorflow/contrib/lite/toco/tooling_util.cc | 13 +++- tensorflow/contrib/lite/toco/tooling_util.h | 1 + 4 files changed, 72 insertions(+), 27 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc index d36e950609..de6d8889fb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc @@ -57,45 +57,60 @@ int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op, } // namespace bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { - const auto maximum_it = model->operators.begin() + op_index; - const auto* maximum_op = maximum_it->get(); - if (maximum_op->type != OperatorType::kTensorFlowMaximum) { + // Follow sequences of min+max and max+min. First get the leading op. + const auto op_it = model->operators.begin() + op_index; + const auto* op_0 = op_it->get(); + if (op_0->type != OperatorType::kTensorFlowMinimum && + op_0->type != OperatorType::kTensorFlowMaximum) { return false; } - CHECK_EQ(maximum_op->inputs.size(), 2); - if (maximum_op->outputs.size() != 1) { - return false; - } - int scalar_input_index = - GetSingleScalarInputIndexOfBinaryOp(model, maximum_op, -1.0f); - if (scalar_input_index == -1) { + + // Get the paired op and ensure it's the counter to the first. + const auto* op_1 = GetOpWithInput(*model, op_0->outputs[0]); + if (!op_1 || + (op_1->type != OperatorType::kTensorFlowMinimum && + op_1->type != OperatorType::kTensorFlowMaximum) || + op_0->type == op_1->type) { return false; } - const auto* minimum_op = GetOpWithInput(*model, maximum_op->outputs[0]); - if (!minimum_op || minimum_op->type != OperatorType::kTensorFlowMinimum) { + + const auto* min_op = + op_0->type == OperatorType::kTensorFlowMinimum ? op_0 : op_1; + const auto* max_op = + op_0->type == OperatorType::kTensorFlowMaximum ? op_0 : op_1; + + CHECK_EQ(min_op->inputs.size(), 2); + CHECK_EQ(max_op->inputs.size(), 2); + if (min_op->outputs.size() != 1 || max_op->outputs.size() != 1) { return false; } - if (GetSingleScalarInputIndexOfBinaryOp(model, minimum_op, 1.0f) == -1) { + + // Get the original input to the min+max pair. + int min_scalar_input_index = + GetSingleScalarInputIndexOfBinaryOp(model, min_op, 1.0f); + int max_scalar_input_index = + GetSingleScalarInputIndexOfBinaryOp(model, max_op, -1.0f); + if (min_scalar_input_index == -1 || max_scalar_input_index == -1) { return false; } - CHECK_EQ(minimum_op->inputs.size(), 2); + int op_0_scalar_input_index = + op_0 == min_op ? min_scalar_input_index : max_scalar_input_index; - // Create and emplace Relu1 node + // Create and emplace Relu1 node. auto* relu1_op = new Relu1Operator; - relu1_op->inputs = {maximum_op->inputs[!scalar_input_index]}; - relu1_op->outputs = minimum_op->outputs; - model->operators.emplace(maximum_it, relu1_op); + relu1_op->inputs = {op_0->inputs[!op_0_scalar_input_index]}; + relu1_op->outputs = op_1->outputs; + model->operators.emplace(op_it, relu1_op); AddMessageF("Creating %s replacing equivalent subgraph", LogName(*relu1_op)); - // Erase Maximum scalar input & operator - model->EraseArray(maximum_op->inputs[scalar_input_index]); - model->operators.erase(FindOperator(model, maximum_op)); - - // Erase Minimum inputs & operator - model->EraseArray(minimum_op->inputs[0]); - model->EraseArray(minimum_op->inputs[1]); - model->operators.erase(FindOperator(model, minimum_op)); + // Erase op scalar inputs & operators. Note that we preserve the non-scalar + // input to the first op as that's been redirected to the relu1_op. + DeleteArrayIfUsedOnce(op_0->inputs[op_0_scalar_input_index], model); + DeleteArrayIfUsedOnce(op_1->inputs[0], model); + DeleteArrayIfUsedOnce(op_1->inputs[1], model); + model->operators.erase(FindOperator(model, op_0)); + model->operators.erase(FindOperator(model, op_1)); return true; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc index cabbc4d313..30a005c789 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc @@ -62,6 +62,20 @@ bool ReorderActivationFunctions::Run(Model* model, std::size_t op_index) { return false; } + // If the ac_op was originally producing an output_array we can't reorder as + // otherwise the output array would change. It'd be nice to still be able to + // reorder but if code is relying on the fetch names instead of array indices + // this won't work. + for (int i = 0; i < model->flags.output_arrays_size(); ++i) { + if (model->flags.output_arrays(i) == ac_op->outputs[0]) { + AddMessageF( + "Not exchanging activation function with %s to preserve output array " + "name %s", + LogName(*exchange_op), ac_op->outputs[0]); + return false; + } + } + // Rewire by changing inputs, including all consumers. Operator* consumer = GetFirstOpWithInput(*model, ac_op_output); while (consumer) { @@ -75,6 +89,10 @@ bool ReorderActivationFunctions::Run(Model* model, std::size_t op_index) { ac_op->inputs[0] = exchange_op_input; exchange_op->inputs[0] = ac_op_output; + // Clear shapes; this will allow shape propagation to fix the sizes for us. + model->GetOrCreateArray(ac_op->outputs[0]).clear_shape(); + model->GetOrCreateArray(exchange_op->outputs[0]).clear_shape(); + // Finally, reorder operators. Note that this only works when there are no // other direct descendents of the exchange_op. ac_op.swap(exchange_op); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index ce0fde57f4..a5fed23158 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -110,7 +110,17 @@ int CountOpsWithInput(const Model& model, const string& array_name) { } bool DeleteArrayIfUnused(const string& array_name, Model* model) { - if (CountOpsWithInput(*model, array_name) == 0) { + if (IsDiscardableArray(*model, array_name) && + CountOpsWithInput(*model, array_name) == 0) { + model->EraseArray(array_name); + return true; + } + return false; +} + +bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) { + if (IsDiscardableArray(*model, array_name) && + CountOpsWithInput(*model, array_name) == 1) { model->EraseArray(array_name); return true; } @@ -321,6 +331,7 @@ string HelpfulOperatorTypeName(const Operator& op) { bool OperatorSupportsFusedActivation(OperatorType type) { switch (type) { case OperatorType::kConcatenation: + case OperatorType::kGather: case OperatorType::kSlice: case OperatorType::kSqueeze: case OperatorType::kTensorFlowReshape: diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 3addccaa10..a2dde09156 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -60,6 +60,7 @@ int CountTrueOutputs(const Model& model, const Operator& op); int CountOpsWithInput(const Model& model, const string& array_name); bool DeleteArrayIfUnused(const string& array_name, Model* model); +bool DeleteArrayIfUsedOnce(const string& array_name, Model* model); std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name); -- GitLab From 25a10f13d88fe13b7cdb0b14f054c65a7a921bfe Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 14 Feb 2018 12:35:44 -0800 Subject: [PATCH 181/629] [TF:XLA] Bump open source llvm revision to r325135 PiperOrigin-RevId: 185727281 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ae45cf26af..579780208c 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -473,11 +473,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/c6a263bfa2fcdb7b56ce17f650406c3313f5deb6.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/c6a263bfa2fcdb7b56ce17f650406c3313f5deb6.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/ba2e473a530286f386d18a95c9de4d673d4a21dc.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/ba2e473a530286f386d18a95c9de4d673d4a21dc.tar.gz", ], - sha256 = "0fcb58ff87c8ecad770a6db5138425d244fdc7aec710da0ae301c947166c53a9", - strip_prefix = "llvm-c6a263bfa2fcdb7b56ce17f650406c3313f5deb6", + sha256 = "0885a7c01220d2a96aeef4ff9aee016837150af839956d18af1845ea1acd0105", + strip_prefix = "llvm-ba2e473a530286f386d18a95c9de4d673d4a21dc", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From df310954b750ddcba8cc2f64cdc59181b91bcc4d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 12:48:25 -0800 Subject: [PATCH 182/629] Exposing a tensor_list data structure PiperOrigin-RevId: 185729104 --- tensorflow/contrib/py2tf/utils/BUILD | 12 +++ tensorflow/contrib/py2tf/utils/tensor_list.py | 49 ++++++++++ .../contrib/py2tf/utils/tensor_list_test.py | 89 +++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 tensorflow/contrib/py2tf/utils/tensor_list.py create mode 100644 tensorflow/contrib/py2tf/utils/tensor_list_test.py diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 74853e50f7..a679cb9076 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -24,6 +24,7 @@ py_library( "misc.py", "multiple_dispatch.py", "py_func.py", + "tensor_list.py", "type_check.py", ], srcs_version = "PY2AND3", @@ -83,3 +84,14 @@ py_test( "//tensorflow/python:client_testlib", ], ) + +py_test( + name = "tensor_list_test", + srcs = ["tensor_list_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":utils", + "//tensorflow/python:client_testlib", + "//tensorflow/python:list_ops", + ], +) diff --git a/tensorflow/contrib/py2tf/utils/tensor_list.py b/tensorflow/contrib/py2tf/utils/tensor_list.py new file mode 100644 index 0000000000..b6ff49e2a0 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/tensor_list.py @@ -0,0 +1,49 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A typed list in Python.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import list_ops + + +class TensorList(object): + """Tensor list wrapper API-compatible with Python built-in list.""" + + def __init__(self, shape, dtype): + self.dtype = dtype + self.shape = shape + self.clear() + + def append(self, value): + self.list_ = list_ops.tensor_list_push_back(self.list_, value) + + def pop(self): + self.list_, value = list_ops.tensor_list_pop_back(self.list_, self.dtype) + return value + + def clear(self): + self.list_ = list_ops.empty_tensor_list(self.shape, self.dtype) + + def count(self): + return list_ops.tensor_list_length(self.list_) + + def __getitem__(self, key): + return list_ops.tensor_list_get_item(self.list_, key, self.dtype) + + def __setitem__(self, key, value): + self.list_ = list_ops.tensor_list_set_item(self.list_, key, value) diff --git a/tensorflow/contrib/py2tf/utils/tensor_list_test.py b/tensorflow/contrib/py2tf/utils/tensor_list_test.py new file mode 100644 index 0000000000..b5e554a162 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/tensor_list_test.py @@ -0,0 +1,89 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for PyFlow list.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.utils import tensor_list as tl +from tensorflow.python.client.session import Session +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.framework.constant_op import constant +from tensorflow.python.platform import test + + +class TensorListTest(test.TestCase): + + def test_list_append_python(self): + with context.eager_mode(): + a = constant(3.0) + l = tl.TensorList(a.shape, a.dtype) + l.append(a) + self.assertEqual(l.count().numpy(), 1) + l.append(a) + self.assertEqual(l.count().numpy(), 2) + _ = l.pop() + self.assertEqual(l.count().numpy(), 1) + a2 = l.pop() + self.assertEqual(l.count().numpy(), 0) + self.assertEqual(a.numpy(), a2.numpy()) + + def test_list_index_python(self): + with context.eager_mode(): + a = constant(3.0) + b = constant(2.0) + l = tl.TensorList(a.shape, a.dtype) + l.append(a) + self.assertEqual(l[0].numpy(), a.numpy()) + l[0] = ops.convert_to_tensor(b) + self.assertEqual(l[0].numpy(), b.numpy()) + + def test_list_append_tf(self): + a = constant(3.0) + l = tl.TensorList(a.shape, a.dtype) + l.append(a) + c1 = l.count() + l.append(a) + c2 = l.count() + _ = l.pop() + c3 = l.count() + a2 = l.pop() + c4 = l.count() + with Session() as sess: + c1, c2, c3, c4, a, a2 = sess.run([c1, c2, c3, c4, a, a2]) + self.assertEqual(c1, 1) + self.assertEqual(c2, 2) + self.assertEqual(c3, 1) + self.assertEqual(c4, 0) + self.assertEqual(a, a2) + + def test_list_index_tf(self): + a = constant(3.0) + b = constant(2.0) + l = tl.TensorList(a.shape, a.dtype) + l.append(a) + l0 = l[0] + l[0] = b + l1 = l[0] + with self.test_session() as sess: + l0, l1, a, b = sess.run([l0, l1, a, b]) + self.assertEqual(l0, a) + self.assertEqual(l1, b) + + +if __name__ == '__main__': + test.main() -- GitLab From eaaf941646ff8b22a6d3ef3689f22ad1b9f7a8e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 13:12:56 -0800 Subject: [PATCH 183/629] Add the utils module to the uncompiled whitelist. PiperOrigin-RevId: 185733139 --- tensorflow/contrib/py2tf/impl/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index 8b81d09cb4..7c3ecefff0 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf import utils + + PYTHON_LITERALS = { 'None': None, 'False': False, @@ -27,6 +30,7 @@ PYTHON_LITERALS = { DEFAULT_UNCOMPILED_MODULES = set(( ('tensorflow',), + (utils.__name__,), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) -- GitLab From 98c717b782ba402af9c9651dd5fdeb418e6a8e16 Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 14 Feb 2018 13:16:30 -0800 Subject: [PATCH 184/629] [XLA:python] Plumb method to get program shape / return shape from builder. Will be useful for setting layouts on compile options for AOT local API. PiperOrigin-RevId: 185733615 --- .../xla/client/computation_builder.cc | 20 +++++++++++++++++++ .../compiler/xla/client/computation_builder.h | 3 +++ .../xla/python/local_computation_builder.cc | 5 +++++ .../xla/python/local_computation_builder.h | 3 +++ .../xla/python/local_computation_builder.i | 10 ++++++++++ tensorflow/compiler/xla/python/xla_client.py | 9 +++++++++ .../compiler/xla/python/xla_client_test.py | 3 ++- .../compiler/xla/tests/axpy_simple_test.cc | 4 ++++ 8 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 46f2ed4836..b1dcad6a49 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -233,6 +233,26 @@ StatusOr> ComputationBuilder::GetShape( return status_or_shape; } +StatusOr ComputationBuilder::GetProgramShape() { + TF_RETURN_IF_ERROR(first_error_); + + GetComputationShapeRequest request; + *request.mutable_computation() = computation_.handle(); + GetComputationShapeResponse response; + + VLOG(2) << "making get-program-shape-request"; + Status status = client_->stub()->GetComputationShape(&request, &response); + VLOG(2) << "done with get-program-shape-request"; + + if (!status.ok()) { + first_error_ = status; + return status; + } + + TF_RET_CHECK(response.has_program_shape()); + return std::move(*response.mutable_program_shape()); +} + ComputationDataHandle ComputationBuilder::CheckShape( const ComputationDataHandle& operand, const Shape& expected_shape) { std::unique_ptr actual_shape = GetShape(operand).ConsumeValueOrDie(); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index ea4cdb7667..7cae91e9e0 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -101,6 +101,9 @@ class ComputationBuilder { StatusOr> GetShape( const ComputationDataHandle& operand); + // Retrieves the (inferred) result for the current computation's shape. + StatusOr GetProgramShape(); + // Checks that the operand has the given expected shape. Returns the operand // if yes, fails with a CHECK error if no. ComputationDataHandle CheckShape(const ComputationDataHandle& operand, diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 3b0d837739..a89146d448 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -303,6 +303,11 @@ std::unique_ptr LocalComputationBuilder::GetShape( return builder_.GetShape(operand).ConsumeValueOrDie(); } +StatusOr LocalComputationBuilder::GetReturnValueShape() { + TF_ASSIGN_OR_RETURN(ProgramShape program_shape, builder_.GetProgramShape()); + return program_shape.result(); +} + ComputationDataHandle LocalComputationBuilder::Infeed(const Shape& shape) { return builder_.Infeed(shape); } diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index 4c6a504f4c..d682204d26 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -133,6 +133,9 @@ class LocalComputationBuilder { std::unique_ptr GetShape(const ComputationDataHandle& operand); + // Returns the shape of the current return value for the computation. + StatusOr GetReturnValueShape(); + ComputationDataHandle Infeed(const Shape& shape); void Outfeed(const ComputationDataHandle& operand, const Shape& shape, diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index 114754bde4..fa6c8bfa29 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -201,6 +201,15 @@ tensorflow::ImportNumpy(); } } +%typemap(out) StatusOr { + if ($1.ok()) { + $result = numpy::PyShapeInfoFromXlaShape($1.ConsumeValueOrDie()); + } else { + PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); + return NULL; + } +} + %typemap(out) Status { if (!$1.ok()) { PyErr_SetString( @@ -850,6 +859,7 @@ tensorflow::ImportNumpy(); %unignore xla::swig::LocalComputationBuilder::ClearOpMetadata; %unignore xla::swig::LocalComputationBuilder::Parameter; %unignore xla::swig::LocalComputationBuilder::GetShape; +%unignore xla::swig::LocalComputationBuilder::GetReturnValueShape; %unignore xla::swig::LocalComputationBuilder::Infeed; %unignore xla::swig::LocalComputationBuilder::Outfeed; %unignore xla::swig::LocalComputationBuilder::ConstantLiteral; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index b3b2b1c7ff..2489ad9509 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -195,6 +195,12 @@ class Shape(object): self._minor_to_major = minor_to_major self._check_minor_to_major() + def __eq__(self, other): + # pylint: disable=protected-access + return (self.np_dtype == other.np_dtype and + self._dimensions == other._dimensions and + self._minor_to_major == other._minor_to_major) + def __repr__(self): return ('xla_client.Shape(np_dtype={!r}, dimensions={!r}, ' 'minor_to_major={!r})').format(self.np_dtype, self._dimensions, @@ -606,6 +612,9 @@ class ComputationBuilder(object): def GetShape(self, operand): return _wrap_shape(self._client.GetShape(_unwrap_data_handle(operand))) + def GetReturnValueShape(self): + return _wrap_shape(self._client.GetReturnValueShape()) + def GetComputationStats(self): raise NotImplementedError() diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index 7565e8146b..c9d09cd5d5 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -86,7 +86,8 @@ class ComputationsWithConstantsTest(LocalComputationTest): def testConstantScalarSumF32(self): c = self._NewComputation() - c.Add(c.ConstantF32Scalar(1.11), c.ConstantF32Scalar(3.14)) + root = c.Add(c.ConstantF32Scalar(1.11), c.ConstantF32Scalar(3.14)) + self.assertEqual(c.GetShape(root), c.GetReturnValueShape()) self._ExecuteAndCompareClose(c, expected=4.25) def testConstantScalarSumF64(self): diff --git a/tensorflow/compiler/xla/tests/axpy_simple_test.cc b/tensorflow/compiler/xla/tests/axpy_simple_test.cc index 627a9c3e7d..3f6fd7c65d 100644 --- a/tensorflow/compiler/xla/tests/axpy_simple_test.cc +++ b/tensorflow/compiler/xla/tests/axpy_simple_test.cc @@ -62,6 +62,10 @@ TEST_F(AxpySimpleTest, AxpyTenValues) { auto ax = builder.Mul(alpha, x); auto axpy = builder.Add(ax, y); + TF_ASSERT_OK_AND_ASSIGN(ProgramShape shape, builder.GetProgramShape()); + + EXPECT_EQ("() -> f32[10]", ShapeUtil::HumanString(shape)); + std::vector expected = { 1.85840735, -1.85840735, 2.28318531, -2.28318531, -6.42477796, 6.42477796, 10.56637061, -10.56637061, -14.70796327, 14.70796327}; -- GitLab From 43fce43944fb29e87a94b4f20c419bb969527ee5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 13:33:37 -0800 Subject: [PATCH 185/629] For Split, pick the 'axis' value from the first input tensor. PiperOrigin-RevId: 185736499 --- .../propagate_fixed_sizes.cc | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 3de251ed70..7cddbadac2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -650,15 +650,34 @@ void ProcessTensorFlowSplitOperator(Model* model, TensorFlowSplitOperator* op) { } const Shape& input_shape = input_array.shape(); - // This code is slightly suspect. The TensorFlow docs say that the axis - // selection defaults to 0, but we are splitting across the final axis. - const int input_dims_count = input_shape.dimensions_count(); - const int input_depth = input_shape.dims(input_dims_count - 1); - CHECK_EQ(input_depth % op->num_split, 0); - const int split_depth = input_depth / op->num_split; + // Yield until axis is constant. + if (!IsConstantParameterArray(*model, op->inputs[0])) { + return; + } + + const auto& axis_array = model->GetArray(op->inputs[0]); + + // Yield until axis dims have been resolved. + if (!axis_array.has_shape()) { + return; + } + + CHECK(axis_array.data_type == ArrayDataType::kInt32) + << "Axis array must be int32."; + CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1) + << "Axis array must be scalar."; + + int axis = axis_array.GetBuffer().data[0]; + if (axis < 0) { + axis += input_shape.dimensions_count(); + } + + const int split_dim = input_shape.dims(axis); + CHECK_EQ(split_dim % op->num_split, 0); + const int split_depth = split_dim / op->num_split; Shape output_shape = input_shape; - (*output_shape.mutable_dims())[input_dims_count - 1] = split_depth; + (*output_shape.mutable_dims())[axis] = split_depth; CHECK_EQ(op->outputs.size(), op->num_split); for (const auto& output : op->outputs) { -- GitLab From 620dc3f097d047346943c416823f5e370df9fe4b Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 14 Feb 2018 13:48:36 -0800 Subject: [PATCH 186/629] [tf.data] Add usage note to `tf.data.Iterator.get_next()` docstring. Fixes #16954. PiperOrigin-RevId: 185738793 --- tensorflow/python/data/ops/iterator_ops.py | 42 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index e573fe0192..4756ec7482 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -44,8 +44,9 @@ GET_NEXT_CALL_WARNING_MESSAGE = ( "This often indicates that `Iterator.get_next()` is being called inside " "a training loop, which will cause gradual slowdown and eventual resource " "exhaustion. If this is the case, restructure your code to call " - "`next_element = iterator.get_next() once outside the loop, and use " - "`next_element` inside the loop.") + "`next_element = iterator.get_next()` once outside the loop, and use " + "`next_element` as the input to some computation that is invoked inside " + "the loop.") @tf_export("data.Iterator") @@ -303,7 +304,42 @@ class Iterator(object): dataset._as_variant_tensor(), self._iterator_resource, name=name) # pylint: disable=protected-access def get_next(self, name=None): - """Returns a nested structure of `tf.Tensor`s containing the next element. + """Returns a nested structure of `tf.Tensor`s representing the next element. + + In graph mode, you should typically call this method *once* and use its + result as the input to another computation. A typical loop will then call + @{tf.Session.run} on the result of that computation. The loop will terminate + when the `Iterator.get_next()` operation raises + @{tf.errors.OutOfRangeError}. The following skeleton shows how to use + this method when building a training loop: + + ```python + dataset = ... # A `tf.data.Dataset` object. + iterator = dataset.make_initializable_iterator() + next_element = iterator.get_next() + + # Build a TensorFlow graph that does something with each element. + loss = model_function(next_element) + optimizer = ... # A `tf.train.Optimizer` object. + train_op = optimizer.minimize(loss) + + with tf.Session() as sess: + try: + while True: + sess.run(train_op) + except tf.errors.OutOfRangeError: + pass + ``` + + NOTE: It is legitimate to call `Iterator.get_next()` multiple times, e.g. + when you are distributing different elements to multiple devices in a single + step. However, a common pitfall arises when users call `Iterator.get_next()` + in each iteration of their training loop. `Iterator.get_next()` adds ops to + the graph, and executing each op allocates resources (including threads); as + a consequence, invoking it in every iteration of a training loop causes + slowdown and eventual resource exhaustion. To guard against this outcome, we + log a warning when the number of uses crosses a fixed threshold of + suspiciousness. Args: name: (Optional.) A name for the created operation. -- GitLab From 21fe8feb34cb4d5b15ce35fc421c7430307facd2 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Wed, 14 Feb 2018 13:51:14 -0800 Subject: [PATCH 187/629] Added C-API based unit tests for GPU and XLA GPU testing. Also refined the API comment for TF_NewSession(). PiperOrigin-RevId: 185739196 --- tensorflow/c/BUILD | 3 +- tensorflow/c/c_api.h | 9 +-- tensorflow/c/c_api_test.cc | 118 ++++++++++++++++++++++++------------ tensorflow/c/c_test_util.cc | 18 ++++-- tensorflow/c/c_test_util.h | 7 +++ 5 files changed, 108 insertions(+), 47 deletions(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 25a994be3e..9060c58c13 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -6,6 +6,7 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", "tf_cc_test", + "tf_cuda_cc_test", "tf_copts", "tf_cuda_library", "tf_custom_op_library", @@ -155,7 +156,7 @@ tf_cuda_library( ], ) -tf_cc_test( +tf_cuda_cc_test( name = "c_api_test", size = "small", srcs = ["c_api_test.cc"], diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index 6d30905a1a..ad592ef709 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -1287,11 +1287,12 @@ TF_CAPI_EXPORT extern void TF_DeleteFunction(TF_Function* func); typedef struct TF_Session TF_Session; -// Return a new execution session with the associated graph, or NULL on error. +// Return a new execution session with the associated graph, or NULL on +// error. Does not take ownership of any input parameters. // -// *graph must be a valid graph (not deleted or nullptr). This function will -// prevent the graph from being deleted until TF_DeleteSession() is called. -// Does not take ownership of opts. +// *`graph` must be a valid graph (not deleted or nullptr). `graph` will be be +// kept alive for the lifetime of the returned TF_Session. New nodes can still +// be added to `graph` after this call. TF_CAPI_EXPORT extern TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opts, TF_Status* status); diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 66d1ea8cad..69fe5bec51 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -57,6 +57,52 @@ static void ExpectHasSubstr(StringPiece s, StringPiece expected) { << "'" << s << "' does not contain '" << expected << "'"; } +// Returns the GPU device name if there is one (with arbitrary tie breaking if +// there are more than one), or "" otherwise. +string GPUDeviceName(TF_Session* session) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TF_Status* s = status.get(); + std::unique_ptr list( + TF_SessionListDevices(session, s), TF_DeleteDeviceList); + TF_DeviceList* device_list = list.get(); + + CHECK_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + + const int num_devices = TF_DeviceListCount(device_list); + LOG(INFO) << "There are " << num_devices << " devices."; + for (int i = 0; i < num_devices; ++i) { + const char* device_name = TF_DeviceListName(device_list, i, s); + CHECK_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + const char* device_type = TF_DeviceListType(device_list, i, s); + CHECK_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + LOG(INFO) << "Device " << i << " has name " << device_name << ", type " + << device_type; + if (string(device_type) == DEVICE_GPU) { + return device_name; + } + } + // No GPU device found. + return ""; +} + +string GPUDeviceName() { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TF_Status* s = status.get(); + std::unique_ptr graph(TF_NewGraph(), + TF_DeleteGraph); + + TF_SessionOptions* opts = TF_NewSessionOptions(); + TF_Session* sess = TF_NewSession(graph.get(), opts, s); + TF_DeleteSessionOptions(opts); + + const string gpu_device_name = GPUDeviceName(sess); + TF_DeleteSession(sess, s); + CHECK_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + return gpu_device_name; +} + TEST(CAPI, Version) { EXPECT_STRNE("", TF_Version()); } TEST(CAPI, Status) { @@ -134,6 +180,10 @@ TEST(CAPI, MaybeMove) { } TEST(CAPI, LibraryLoadFunctions) { + // TODO(b/73318067): Fix linking for the GPU test generated by the + // tf_cuda_cc_test() bazel rule and remove the next line. + if (!GPUDeviceName().empty()) return; + // Load the library. TF_Status* status = TF_NewStatus(); TF_Library* lib = @@ -923,7 +973,9 @@ TEST(CAPI, Session) { TF_DeleteStatus(s); } -TEST(CAPI, Session_Min_CPU) { +// If `device` is non-empty, run Min op on that device. +// Otherwise run it on the default device (CPU). +void RunMinTest(const string& device, bool use_XLA) { TF_Status* s = TF_NewStatus(); TF_Graph* graph = TF_NewGraph(); @@ -935,12 +987,14 @@ TEST(CAPI, Session_Min_CPU) { TF_Operation* one = ScalarConst(0, graph, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - // Add operation. - TF_Operation* min = Min(feed, one, graph, s); + // Create a session for this graph. + CSession csession(graph, s, use_XLA); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - // Create a session for this graph. - CSession csession(graph, s); + if (!device.empty()) { + LOG(INFO) << "Setting op Min on device " << device; + } + TF_Operation* min = MinWithDevice(feed, one, graph, device, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); // Run the graph. @@ -963,44 +1017,24 @@ TEST(CAPI, Session_Min_CPU) { TF_DeleteStatus(s); } -TEST(CAPI, Session_Min_XLA_CPU) { - TF_Status* s = TF_NewStatus(); - TF_Graph* graph = TF_NewGraph(); - - // Make a placeholder operation. - TF_Operation* feed = Placeholder(graph, s); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); +TEST(CAPI, Session_Min_CPU) { RunMinTest(/*device=*/"", /*use_XLA=*/false); } - // Make a constant operation with the scalar "0", for axis. - TF_Operation* one = ScalarConst(0, graph, s); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); +TEST(CAPI, Session_Min_XLA_CPU) { RunMinTest(/*device=*/"", /*use_XLA=*/true); } - // Add operation. - TF_Operation* min = Min(feed, one, graph, s); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); +TEST(CAPI, Session_Min_GPU) { + const string gpu_device = GPUDeviceName(); + // Skip this test if no GPU is available. + if (gpu_device.empty()) return; - // Create a session for this graph. - CSession csession(graph, s, /*use_XLA=*/true); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + RunMinTest(gpu_device, /*use_XLA=*/false); +} - // Run the graph. - csession.SetInputs({{feed, Int32Tensor({3, 2, 5})}}); - csession.SetOutputs({min}); - csession.Run(s); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - TF_Tensor* out = csession.output_tensor(0); - ASSERT_TRUE(out != nullptr); - EXPECT_EQ(TF_INT32, TF_TensorType(out)); - EXPECT_EQ(0, TF_NumDims(out)); // scalar - ASSERT_EQ(sizeof(int32), TF_TensorByteSize(out)); - int32* output_contents = static_cast(TF_TensorData(out)); - EXPECT_EQ(2, *output_contents); +TEST(CAPI, Session_Min_XLA_GPU) { + const string gpu_device = GPUDeviceName(); + // Skip this test if no GPU is available. + if (gpu_device.empty()) return; - // Clean up - csession.CloseAndDelete(s); - ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); - TF_DeleteGraph(graph); - TF_DeleteStatus(s); + RunMinTest(gpu_device, /*use_XLA=*/true); } TEST(CAPI, SessionPRun) { @@ -2145,6 +2179,10 @@ TEST_F(CApiAttributesTest, Errors) { } TEST(TestApiDef, TestCreateApiDef) { + // TODO(b/73318067): Fix linking for the GPU test generated by the + // tf_cuda_cc_test() bazel rule and remove the next line. + if (!GPUDeviceName().empty()) return; + TF_Status* status = TF_NewStatus(); TF_Library* lib = TF_LoadLibrary("tensorflow/c/test_op.so", status); @@ -2175,6 +2213,10 @@ TEST(TestApiDef, TestCreateApiDef) { } TEST(TestApiDef, TestCreateApiDefWithOverwrites) { + // TODO(b/73318067): Fix linking for the GPU test generated by the + // tf_cuda_cc_test() bazel rule and remove the next line. + if (!GPUDeviceName().empty()) return; + TF_Status* status = TF_NewStatus(); TF_Library* lib = TF_LoadLibrary("tensorflow/c/test_op.so", status); diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 2c5f08d672..a55af46ae2 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -163,10 +163,14 @@ TF_Operation* AddWithCtrlDependency(TF_Operation* l, TF_Operation* r, return TF_FinishOperation(desc, s); } +// If `op_device` is non-empty, set the created op on that device. void BinaryOpHelper(const char* op_name, TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name, - TF_Operation** op, bool check) { + TF_Operation** op, const string& op_device, bool check) { TF_OperationDescription* desc = TF_NewOperation(graph, op_name, name); + if (!op_device.empty()) { + TF_SetDevice(desc, op_device.c_str()); + } TF_AddInput(desc, {l, 0}); TF_AddInput(desc, {r, 0}); *op = TF_FinishOperation(desc, s); @@ -176,13 +180,19 @@ void BinaryOpHelper(const char* op_name, TF_Operation* l, TF_Operation* r, } } -TF_Operation* Min(TF_Operation* l, TF_Operation* r, TF_Graph* graph, - TF_Status* s, const char* name) { +TF_Operation* MinWithDevice(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + const string& op_device, TF_Status* s, + const char* name) { TF_Operation* op; - BinaryOpHelper("Min", l, r, graph, s, name, &op, true); + BinaryOpHelper("Min", l, r, graph, s, name, &op, op_device, true); return op; } +TF_Operation* Min(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + TF_Status* s, const char* name) { + return MinWithDevice(l, r, graph, /*op_device=*/"", s, name); +} + TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s, const char* name) { TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name); diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index 805fafae05..2a70177c72 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -72,6 +72,11 @@ TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s, TF_Operation* Min(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name = "min"); +// If `op_device` is non-empty, set the created op on that device. +TF_Operation* MinWithDevice(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + const string& op_device, TF_Status* s, + const char* name = "min"); + TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s, const char* name = "neg"); @@ -127,6 +132,8 @@ class CSession { TF_Tensor* output_tensor(int i) { return output_values_[i]; } + TF_Session* mutable_session() { return session_; } + private: void DeleteInputValues(); void ResetOutputValues(); -- GitLab From c5c0ec07321e4911d803ba8aa9a1f4049a88710f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 13:51:23 -0800 Subject: [PATCH 188/629] Fixes MovingAverageOptimizer when dealing with resource variables. Also make it an error if user tries to use swapping_saver with some variables but do not include their EMA counterparts. PiperOrigin-RevId: 185739214 --- .../training/moving_average_optimizer.py | 47 ++++++++---- .../training/moving_average_optimizer_test.py | 72 +++++++++++++++++-- 2 files changed, 100 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py index d68ad23d65..9ce50bfe10 100644 --- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py @@ -83,7 +83,7 @@ class MovingAverageOptimizer(optimizer.Optimizer): self._optimizer = opt self._ema = moving_averages.ExponentialMovingAverage( average_decay, num_updates=num_updates) - self._variable_map = None + self._swapped_variable_name_map = None self._sequential_update = sequential_update def compute_gradients(self, *args, **kwargs): @@ -93,7 +93,7 @@ class MovingAverageOptimizer(optimizer.Optimizer): train_op = self._optimizer.apply_gradients( grads_and_vars, global_step=global_step, name=name) var_list = [x[1] for x in grads_and_vars if x[0] is not None] - self._variable_map = {} + self._swapped_variable_name_map = {} if self._sequential_update: with ops.control_dependencies([train_op]): ma_op = self._ema.apply(var_list) @@ -102,9 +102,9 @@ class MovingAverageOptimizer(optimizer.Optimizer): for v in var_list: v_avg = self._ema.average(v) - self._variable_map[v.op.name] = v_avg - self._variable_map[v_avg.op.name] = v - return control_flow_ops.group(train_op, ma_op, name="train_with_avg") + self._swapped_variable_name_map[v.op.name] = v_avg.op.name + self._swapped_variable_name_map[v_avg.op.name] = v.op.name + return control_flow_ops.group(train_op, ma_op, name='train_with_avg') def swapping_saver(self, var_list=None, name='swapping_saver', **kwargs): """Create a saver swapping moving averages and variables. @@ -129,22 +129,45 @@ class MovingAverageOptimizer(optimizer.Optimizer): Raises: RuntimeError: If apply_gradients or minimize has not been called before. + ValueError: If var_list is provided and contains some variables but not + their moving average counterpart. """ - if self._variable_map is None: + if self._swapped_variable_name_map is None: raise RuntimeError('Must call apply_gradients or minimize before ' 'creating the swapping_saver') if var_list is None: var_list = variables.global_variables() if not isinstance(var_list, dict): var_list = saver.BaseSaverBuilder.OpListToDict(var_list) + + # OpListToDict converts variables to tensors. We make sure we can get + # the unique variable name for normal and resource vaiables. + def get_v_name(tensor): + if tensor.op.type == 'ReadVariableOp': + return tensor.op.inputs[0].op.name + else: + return tensor.op.name + + v_name_to_tensor = {} + for tensor in six.itervalues(var_list): + v_name = get_v_name(tensor) + v_name_to_tensor[v_name] = tensor + # Now swap variables and moving averages swapped_var_list = {} - for k, v in six.iteritems(var_list): - v_swap = self._variable_map.get(v.op.name, None) - if v_swap: - swapped_var_list[k] = v_swap - else: - swapped_var_list[k] = v + for k, tensor in six.iteritems(var_list): + v_name = get_v_name(tensor) + swapped_v_name = self._swapped_variable_name_map.get(v_name, None) + tensor_to_save = tensor + if swapped_v_name is not None: + if swapped_v_name in v_name_to_tensor: + tensor_to_save = v_name_to_tensor[swapped_v_name] + else: + raise ValueError( + ('Variable to swap %s is not part of variables to save. ' + 'This breaks MovingAverageOptimizer.') % swapped_v_name) + swapped_var_list[k] = tensor_to_save + # Build the swapping saver. return saver.Saver(swapped_var_list, name=name, **kwargs) diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py index 60929add19..85e3e8d379 100644 --- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py @@ -24,6 +24,10 @@ import six from tensorflow.contrib.opt.python.training import moving_average_optimizer from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import gradient_descent @@ -33,13 +37,26 @@ from tensorflow.python.training import saver class MovingAverageOptimizerTest(test.TestCase): def testRun(self): + self._helpTestRun(use_resource=False) + + def testRunUseResource(self): + # Test that MovingAverageOptimizer works with resource variables. + self._helpTestRun(use_resource=True) + + def _helpTestRun(self, use_resource=False): for sequential_update in [True, False]: for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session() as sess: + with self.test_session(graph=ops.Graph()) as sess: orig_val0 = [1.0, 2.0] orig_val1 = [3.0, 4.0] - var0 = variables.Variable(orig_val0, name='var0', dtype=dtype) - var1 = variables.Variable(orig_val1, name='var1', dtype=dtype) + var0 = variable_scope.get_variable( + 'var0', + initializer=constant_op.constant(orig_val0, dtype=dtype), + use_resource=use_resource) + var1 = variable_scope.get_variable( + 'var1', + initializer=constant_op.constant(orig_val1, dtype=dtype), + use_resource=use_resource) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) @@ -52,22 +69,63 @@ class MovingAverageOptimizerTest(test.TestCase): save_path = os.path.join(save_dir, 'model') update = opt.apply_gradients( list(six.moves.zip([grads0, grads1], [var0, var1]))) + global_vars = variables.global_variables() + ema_var0 = [ + v for v in global_vars + if v.op.name == 'var0/ExponentialMovingAverage' + ][0] + ema_var1 = [ + v for v in global_vars + if v.op.name == 'var1/ExponentialMovingAverage' + ][0] + perturb = control_flow_ops.group([ + state_ops.assign_add(var0, [1.0, 1.0]), + state_ops.assign_add(var1, [2.0, 2.0]), + state_ops.assign_add(ema_var0, [3.0, 3.0]), + state_ops.assign_add(ema_var1, [4.0, 4.0]) + ]) + + # Test taht saver with missing ema variables will fail. + with self.assertRaisesRegexp(ValueError, r'Variable to swap'): + opt.swapping_saver(var_list=[var0]) + train_saver = opt.swapping_saver() + train_saver_subset = opt.swapping_saver(var_list=[var0, ema_var0]) inference_saver = saver.Saver() variables.global_variables_initializer().run() # Step 1. update.run() - val0 = var0.eval() - val1 = var1.eval() self.assertAllCloseAccordingToType([0.8, 1.8], var0.eval()) self.assertAllCloseAccordingToType([2.98, 3.98], var1.eval()) + if sequential_update: + self.assertAllCloseAccordingToType([0.9, 1.9], ema_var0.eval()) + self.assertAllCloseAccordingToType([2.99, 3.99], ema_var1.eval()) # Test that the swapping saver save/restore operation is identity. train_saver.save(sess, save_path) train_saver.restore(sess, save_path) - val0 = var0.eval() - val1 = var1.eval() self.assertAllCloseAccordingToType([0.8, 1.8], var0.eval()) self.assertAllCloseAccordingToType([2.98, 3.98], var1.eval()) + if sequential_update: + self.assertAllCloseAccordingToType([0.9, 1.9], ema_var0.eval()) + self.assertAllCloseAccordingToType([2.99, 3.99], ema_var1.eval()) + # Test that the subset saver saves the EMA variable as well. + if sequential_update: + subset_save_path = save_path + '_subset' + train_saver_subset.save(sess, subset_save_path) + perturb.run() + self.assertAllCloseAccordingToType([1.8, 2.8], var0.eval()) + self.assertAllCloseAccordingToType([3.9, 4.9], ema_var0.eval()) + self.assertAllCloseAccordingToType([4.98, 5.98], var1.eval()) + self.assertAllCloseAccordingToType([6.99, 7.99], ema_var1.eval()) + # Restoring should only restore var0 and ema_var0. + train_saver_subset.restore(sess, subset_save_path) + self.assertAllCloseAccordingToType([0.8, 1.8], var0.eval()) + self.assertAllCloseAccordingToType([0.9, 1.9], ema_var0.eval()) + self.assertAllCloseAccordingToType([4.98, 5.98], var1.eval()) + self.assertAllCloseAccordingToType([6.99, 7.99], ema_var1.eval()) + # Restore back to previou state. + train_saver.restore(sess, save_path) + # If updates are parallel, this is not always true after the 1st step. if sequential_update: # Test that the normal saver will have the averaged variables. -- GitLab From 04348511079ffee7cb169bb3bef42a47ec1736c6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 14 Feb 2018 13:52:18 -0800 Subject: [PATCH 189/629] [XLA] Add reproducer that shows perf issues in HloDataflowAnalysis::UpdateTupleValueSet, then optimize that method. HloDataflowAnalysis::UpdateTupleValueSet starts to show up in profiles for while bodies that have many GetTupleElement nodes. Use a set to keep track of which HloInstructions we need to propagate DFA for. PiperOrigin-RevId: 185739365 --- .../xla/service/copy_insertion_test.cc | 50 +++++++++++++++++++ .../xla/service/hlo_dataflow_analysis.cc | 28 +++++++---- 2 files changed, 68 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 128ee726ea..153f062d01 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1724,8 +1724,58 @@ void BM_ParallelWhiles(int num_iters, int num_whiles) { } } +std::unique_ptr MakeBenchmarkWhileBody( + const int num_tuple_inputs) { + auto builder = HloComputation::Builder("benchmark_loop_body"); + const Shape element_shape = ShapeUtil::MakeShape(F32, {}); + std::vector input_shape(num_tuple_inputs, element_shape); + const Shape loop_state_shape = ShapeUtil::MakeTupleShape(input_shape); + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, loop_state_shape, "loop_state")); + std::vector gte_nodes(num_tuple_inputs); + for (int i = 0; i < num_tuple_inputs; ++i) { + gte_nodes[i] = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(element_shape, param, i)); + } + builder.AddInstruction(HloInstruction::CreateTuple(gte_nodes)); + return builder.Build(); +} + +void BM_ManyElementTuple(int num_iters, const int num_tuple_inputs) { + tensorflow::testing::StopTiming(); + HloModuleConfig config; + config.set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); + CopyInsertion copy_insertion; + const Shape element_shape = ShapeUtil::MakeShape(F32, {}); + std::vector tuple_params(num_tuple_inputs); + for (int i = 0; i < num_iters; ++i) { + auto builder = HloComputation::Builder("BM_ParallelWhiles"); + HloModule module("BM_ManyElementTuple", VersionedComputationHandle(), + config); + for (int j = 0; j < num_tuple_inputs; ++j) { + tuple_params[j] = builder.AddInstruction( + HloInstruction::CreateParameter(j, element_shape, "")); + } + HloInstruction* init = + builder.AddInstruction(HloInstruction::CreateTuple(tuple_params)); + HloComputation* condition = + module.AddEmbeddedComputation(MakeTrivialCondition(init->shape())); + HloComputation* body = + module.AddEmbeddedComputation(MakeBenchmarkWhileBody(num_tuple_inputs)); + HloInstruction* xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(init->shape(), condition, body, init)); + builder.AddInstruction(HloInstruction::CreateGetTupleElement( + ShapeUtil::MakeShape(F32, {}), xla_while, 0)); + module.AddEntryComputation(builder.Build()); + tensorflow::testing::StartTiming(); + ASSERT_IS_OK(copy_insertion.Run(&module).status()); + tensorflow::testing::StopTiming(); + } +} + BENCHMARK(BM_SequentialWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096); BENCHMARK(BM_ParallelWhiles)->Arg(512)->Arg(1024)->Arg(2048)->Arg(4096); +BENCHMARK(BM_ManyElementTuple)->Arg(1024)->Arg(12288); TEST_F(CopyInsertionTest, SimpleControlFlowTest) { const string& hlo_string = R"( diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index d25fc5d741..ccbbe8f196 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -585,16 +585,23 @@ bool HloDataflowAnalysis::UpdateInstructionValueSet( void HloDataflowAnalysis::Propagate() { std::queue worklist; + tensorflow::gtl::FlatSet workset; + auto add_to_worklist = [&worklist, &workset](HloInstruction* instruction) { + if (workset.insert(instruction).second) { + worklist.push(instruction); + } + }; for (HloComputation* computation : module_->computations()) { for (HloInstruction* instruction : computation->instructions()) { - worklist.push(instruction); + add_to_worklist(instruction); } } while (!worklist.empty()) { HloInstruction* instruction = worklist.front(); worklist.pop(); + workset.erase(workset.find(instruction)); VLOG(3) << "Worklist top: " << instruction->name(); VLOG(3) << ToString(); @@ -608,9 +615,10 @@ void HloDataflowAnalysis::Propagate() { VLOG(4) << "New value set for " << instruction->name() << ": " << GetInstructionValueSet(instruction); - // Instruction value was updated. Add users to work list. + // Instruction value was updated. Add users to work list if we haven't + // already. for (HloInstruction* user : instruction->users()) { - worklist.push(user); + add_to_worklist(user); // If user sequentially calls a computation, then the respective // parameter(s) of the computation need to be updated. @@ -625,10 +633,10 @@ void HloDataflowAnalysis::Propagate() { // Note that the same instruction can be used in both operand 1 and // operand 2. if (user->operand(1) == instruction) { - worklist.push(user->true_computation()->parameter_instruction(0)); + add_to_worklist(user->true_computation()->parameter_instruction(0)); } if (user->operand(2) == instruction) { - worklist.push(user->false_computation()->parameter_instruction(0)); + add_to_worklist(user->false_computation()->parameter_instruction(0)); } } else { for (HloComputation* called_computation : user->called_computations()) { @@ -636,7 +644,7 @@ void HloDataflowAnalysis::Propagate() { call_graph_->GetNode(called_computation); if (call_graph_node.context() == CallContext::kSequential) { for (int64 operand_number : user->OperandIndices(instruction)) { - worklist.push( + add_to_worklist( called_computation->parameter_instruction(operand_number)); } } @@ -652,13 +660,13 @@ void HloDataflowAnalysis::Propagate() { for (const CallSite& callsite : call_graph_node.caller_callsites()) { if ((callsite.instruction()->opcode() == HloOpcode::kCall) || (callsite.instruction()->opcode() == HloOpcode::kConditional)) { - worklist.push(callsite.instruction()); + add_to_worklist(callsite.instruction()); } else if (callsite.instruction()->opcode() == HloOpcode::kWhile) { // Add the while itself, and the body and condition parameters. - worklist.push(callsite.instruction()); - worklist.push( + add_to_worklist(callsite.instruction()); + add_to_worklist( callsite.instruction()->while_body()->parameter_instruction(0)); - worklist.push( + add_to_worklist( callsite.instruction()->while_condition()->parameter_instruction( 0)); } -- GitLab From dd7d9486ae90b4b65e896c1dd068b3c09c3b53b8 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 14 Feb 2018 13:57:29 -0800 Subject: [PATCH 190/629] set_shape fix in _UnreadVariable PiperOrigin-RevId: 185740099 --- tensorflow/python/ops/resource_variable_ops.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 62c65c1dcf..8d6a4df6bf 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -933,6 +933,9 @@ class _UnreadVariable(ResourceVariable): return gen_resource_variable_ops.read_variable_op(self._handle, self._dtype) + def set_shape(self, shape): + self._shape = shape + @property def op(self): """The op for this variable.""" -- GitLab From 90159c53b83527bd088452769ea4e1b98667860c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 14:40:29 -0800 Subject: [PATCH 191/629] Supports op exp (tf.exp) in Toco and Tensorflow Lite. PiperOrigin-RevId: 185747281 --- tensorflow/contrib/lite/kernels/BUILD | 13 ++ tensorflow/contrib/lite/kernels/exp.cc | 92 +++++++++ tensorflow/contrib/lite/kernels/exp_test.cc | 70 +++++++ .../internal/reference/reference_ops.h | 8 + tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 185 ++++++++++++++++-- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 28 +++ .../testing/generated_examples_zip_test.cc | 1 + .../propagate_fixed_sizes.cc | 1 + .../contrib/lite/toco/import_tensorflow.cc | 13 ++ tensorflow/contrib/lite/toco/model.h | 12 ++ .../contrib/lite/toco/tflite/operator.cc | 1 + .../contrib/lite/toco/tflite/operator_test.cc | 1 + tensorflow/contrib/lite/toco/tooling_util.cc | 1 + 18 files changed, 415 insertions(+), 21 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/exp.cc create mode 100644 tensorflow/contrib/lite/kernels/exp_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index a8ef0daede..5d553def0a 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -111,6 +111,7 @@ cc_library( "div.cc", "embedding_lookup.cc", "embedding_lookup_sparse.cc", + "exp.cc", "fully_connected.cc", "gather.cc", "hashtable_lookup.cc", @@ -327,6 +328,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "exp_test", + size = "small", + srcs = ["exp_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "mean_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/exp.cc b/tensorflow/contrib/lite/kernels/exp.cc new file mode 100644 index 0000000000..a9e79b742d --- /dev/null +++ b/tensorflow/contrib/lite/kernels/exp.cc @@ -0,0 +1,92 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace exp { + +// This file has reference implementation of Exp. +enum KernelType { + kReference, +}; + +struct ExpContext { + ExpContext(TfLiteContext* context, TfLiteNode* node) { + input = GetInput(context, node, 0); + output = GetOutput(context, node, 0); + } + TfLiteTensor* input; + TfLiteTensor* output; +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + ExpContext op_context(context, node); + TfLiteIntArray* output_dims = TfLiteIntArrayCopy(op_context.input->dims); + op_context.output->type = op_context.input->type; + return context->ResizeTensor(context, op_context.output, output_dims); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + ExpContext op_context(context, node); + +#define TF_LITE_EXP(kernel_type, data_type) \ + kernel_type::Exp(GetTensorData(op_context.input), \ + NumElements(op_context.input), \ + GetTensorData(op_context.output)) + + // TODO(kanlig): supports half, bfloat16, float64, complex64, and complex128. + if (kernel_type == kReference) { + switch (op_context.input->type) { + case kTfLiteFloat32: + TF_LITE_EXP(reference_ops, float); + break; + default: + context->ReportError(context, + "Type %d is currently not supported by Exp.", + op_context.input->type); + return kTfLiteError; + } + } +#undef TF_LITE_EXP + return kTfLiteOk; +} + +} // namespace exp + +TfLiteRegistration* Register_EXP_REF() { + static TfLiteRegistration r = {nullptr, nullptr, exp::Prepare, + exp::Eval}; + return &r; +} + +// TODO(kanlig): add optimized implementation of Exp. +TfLiteRegistration* Register_EXP() { return Register_EXP_REF(); } + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/exp_test.cc b/tensorflow/contrib/lite/kernels/exp_test.cc new file mode 100644 index 0000000000..eed67369a1 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/exp_test.cc @@ -0,0 +1,70 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class ExpOpModel : public SingleOpModel { + public: + ExpOpModel(const TensorData& input, const TensorType& output) { + input_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_EXP, BuiltinOptions_ExpOptions, + CreateExpOptions(builder_).Union()); + BuildInterpreter({GetShape(input_)}); + } + + template + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + template + std::vector GetOutput() { + return ExtractVector(output_); + } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input_; + int output_; +}; + +TEST(ExpOpTest, FloatTest) { + std::initializer_list data = {1.0, 0.0, -1.0, 1.0, 1.0, -1.0}; + ExpOpModel m({TensorType_FLOAT32, {3, 1, 2}}, TensorType_FLOAT32); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3, 1, 2})); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {2.71828, 1, 0.367879, 2.71828, 2.71828, 0.367879}))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index f18543f4e4..2e0376656a 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -2762,6 +2762,14 @@ inline void Slice(const T* input_data, const Dims<4>& input_dims, } } +template +inline void Exp(const T* input_data, const size_t num_elements, + T* output_data) { + for (size_t idx = 0; idx < num_elements; ++idx) { + output_data[idx] = exp(input_data[idx]); + } +} + template inline void Mean(T* input_data, const int* input_dims, const int input_num_dims, T* output_data, const int* output_dims, diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 1fb779fd51..0f365078cd 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -60,6 +60,7 @@ TfLiteRegistration* Register_TRANSPOSE(); TfLiteRegistration* Register_MEAN(); TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE(); +TfLiteRegistration* Register_EXP(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -108,6 +109,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_SUB, Register_SUB()); AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE()); AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); + AddBuiltin(BuiltinOperator_EXP, Register_EXP()); } TfLiteRegistration* BuiltinOpResolver::FindOp( diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 14b6709964..2ee0cac11c 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -278,6 +278,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_RELU_N1_TO_1: case BuiltinOperator_RELU6: case BuiltinOperator_CONCAT_EMBEDDINGS: + case BuiltinOperator_EXP: break; case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index da9ceec2f1..77084b4dc8 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -341,6 +341,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_SUB: case tflite::BuiltinOperator_SQUEEZE: case tflite::BuiltinOperator_STRIDED_SLICE: + case tflite::BuiltinOperator_EXP: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 36cc2724eb..ef8f39cf55 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -120,6 +120,7 @@ enum BuiltinOperator : byte { UNIDIRECTIONAL_SEQUENCE_LSTM = 44, STRIDED_SLICE = 45, BIDIRECTIONAL_SEQUENCE_RNN = 46, + EXP = 47, } // Options for the builtin operators. @@ -156,6 +157,7 @@ union BuiltinOptions { SqueezeOptions, SequenceRNNOptions, StridedSliceOptions, + ExpOptions, } enum Padding : byte { SAME, VALID } @@ -332,6 +334,9 @@ table GatherOptions { table TransposeOptions { } +table ExpOptions { +} + table MeanOptions { keep_dims: bool; } diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index e2ac0b9d1e..40b50bab45 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -117,6 +117,9 @@ struct GatherOptionsT; struct TransposeOptions; struct TransposeOptionsT; +struct ExpOptions; +struct ExpOptionsT; + struct MeanOptions; struct MeanOptionsT; @@ -215,11 +218,12 @@ enum BuiltinOperator { BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM = 44, BuiltinOperator_STRIDED_SLICE = 45, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, + BuiltinOperator_EXP = 47, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN + BuiltinOperator_MAX = BuiltinOperator_EXP }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[44] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[45] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -264,7 +268,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[44] { BuiltinOperator_SQUEEZE, BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, BuiltinOperator_STRIDED_SLICE, - BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN}; + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP}; return values; } @@ -316,6 +321,7 @@ inline const char **EnumNamesBuiltinOperator() { "UNIDIRECTIONAL_SEQUENCE_LSTM", "STRIDED_SLICE", "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", nullptr}; return names; } @@ -359,11 +365,12 @@ enum BuiltinOptions { BuiltinOptions_SqueezeOptions = 30, BuiltinOptions_SequenceRNNOptions = 31, BuiltinOptions_StridedSliceOptions = 32, + BuiltinOptions_ExpOptions = 33, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_StridedSliceOptions + BuiltinOptions_MAX = BuiltinOptions_ExpOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[33] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[34] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -397,7 +404,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[33] { BuiltinOptions_DivOptions, BuiltinOptions_SqueezeOptions, BuiltinOptions_SequenceRNNOptions, - BuiltinOptions_StridedSliceOptions}; + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions}; return values; } @@ -435,6 +443,7 @@ inline const char **EnumNamesBuiltinOptions() { "SqueezeOptions", "SequenceRNNOptions", "StridedSliceOptions", + "ExpOptions", nullptr}; return names; } @@ -613,6 +622,11 @@ struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; }; +template <> +struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -980,6 +994,16 @@ struct BuiltinOptionsUnion { ? reinterpret_cast(value) : nullptr; } + ExpOptionsT *AsExpOptions() { + return type == BuiltinOptions_ExpOptions + ? reinterpret_cast(value) + : nullptr; + } + const ExpOptionsT *AsExpOptions() const { + return type == BuiltinOptions_ExpOptions + ? reinterpret_cast(value) + : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, @@ -2627,16 +2651,13 @@ flatbuffers::Offset CreateLSTMOptions( struct ResizeBilinearOptionsT : public flatbuffers::NativeTable { typedef ResizeBilinearOptions TableType; bool align_corners; - ResizeBilinearOptionsT() - : align_corners(false) { - } + ResizeBilinearOptionsT() : align_corners(false) {} }; -struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS + : private flatbuffers::Table { typedef ResizeBilinearOptionsT NativeTableType; - enum { - VT_ALIGN_CORNERS = 8 - }; + enum { VT_ALIGN_CORNERS = 8 }; bool align_corners() const { return GetField(VT_ALIGN_CORNERS, 0) != 0; } @@ -2645,16 +2666,22 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Tabl VerifyField(verifier, VT_ALIGN_CORNERS) && verifier.EndTable(); } - ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ResizeBilinearOptionsT *UnPack( + const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo( + ResizeBilinearOptionsT *_o, + const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack( + flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ResizeBilinearOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_align_corners(bool align_corners) { - fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); + fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, + static_cast(align_corners), 0); } explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { @@ -2669,14 +2696,15 @@ struct ResizeBilinearOptionsBuilder { }; inline flatbuffers::Offset CreateResizeBilinearOptions( - flatbuffers::FlatBufferBuilder &_fbb, - bool align_corners = false) { + flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) { ResizeBilinearOptionsBuilder builder_(_fbb); builder_.add_align_corners(align_corners); return builder_.Finish(); } -flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateResizeBilinearOptions( + flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct CallOptionsT : public flatbuffers::NativeTable { typedef CallOptions TableType; @@ -3345,6 +3373,51 @@ flatbuffers::Offset CreateTransposeOptions( flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct ExpOptionsT : public flatbuffers::NativeTable { + typedef ExpOptions TableType; + ExpOptionsT() {} +}; + +struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef ExpOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && verifier.EndTable(); + } + ExpOptionsT *UnPack( + const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo( + ExpOptionsT *_o, + const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack( + flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct ExpOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateExpOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + ExpOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateExpOptions( + flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct MeanOptionsT : public flatbuffers::NativeTable { typedef MeanOptions TableType; bool keep_dims; @@ -3858,6 +3931,11 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { ? static_cast(builtin_options()) : nullptr; } + const ExpOptions *builtin_options_as_ExpOptions() const { + return builtin_options_type() == BuiltinOptions_ExpOptions + ? static_cast(builtin_options()) + : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -4071,6 +4149,11 @@ Operator::builtin_options_as() const { return builtin_options_as_StridedSliceOptions(); } +template <> +inline const ExpOptions *Operator::builtin_options_as() const { + return builtin_options_as_ExpOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5449,6 +5532,10 @@ inline void ResizeBilinearOptions::UnPackTo( const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; + { + auto _e = align_corners(); + _o->align_corners = _e; + }; } inline flatbuffers::Offset ResizeBilinearOptions::Pack( @@ -5468,7 +5555,8 @@ inline flatbuffers::Offset CreateResizeBilinearOptions( const flatbuffers::rehasher_function_t *__rehasher; } _va = {&_fbb, _o, _rehasher}; (void)_va; - return tflite::CreateResizeBilinearOptions(_fbb); + auto _align_corners = _o->align_corners; + return tflite::CreateResizeBilinearOptions(_fbb, _align_corners); } inline CallOptionsT *CallOptions::UnPack( @@ -5935,6 +6023,39 @@ inline flatbuffers::Offset CreateTransposeOptions( return tflite::CreateTransposeOptions(_fbb); } +inline ExpOptionsT *ExpOptions::UnPack( + const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new ExpOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void ExpOptions::UnPackTo( + ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset ExpOptions::Pack( + flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher) { + return CreateExpOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateExpOptions( + flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, + const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { + flatbuffers::FlatBufferBuilder *__fbb; + const ExpOptionsT *__o; + const flatbuffers::rehasher_function_t *__rehasher; + } _va = {&_fbb, _o, _rehasher}; + (void)_va; + return tflite::CreateExpOptions(_fbb); +} + inline MeanOptionsT *MeanOptions::UnPack( const flatbuffers::resolver_function_t *_resolver) const { auto _o = new MeanOptionsT(); @@ -6595,6 +6716,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } @@ -6604,6 +6729,7 @@ inline bool VerifyBuiltinOptionsVector( flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; if (values->size() != types->size()) return false; for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { if (!VerifyBuiltinOptions(verifier, values->Get(i), @@ -6747,6 +6873,10 @@ inline void *BuiltinOptionsUnion::UnPack( auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } @@ -6886,6 +7016,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack( auto ptr = reinterpret_cast(value); return CreateStridedSliceOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(value); + return CreateExpOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } @@ -7041,6 +7175,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) *reinterpret_cast(u.value)); break; } + case BuiltinOptions_ExpOptions: { + value = new ExpOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -7208,6 +7346,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_ExpOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 9351cf9d64..8739ffb34c 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -25,6 +25,7 @@ gen_zipped_test_files( "conv.zip", "depthwiseconv.zip", "div.zip", + "exp.zip", "fully_connected.zip", "fused_batch_norm.zip", "gather.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index a86c64849f..7fe46165c7 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -745,6 +745,33 @@ def make_mean_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_exp_tests(zip_path): + """Make a set of tests to do exp.""" + + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape": [[3], [1, 100], [4, 2, 3], [5, 224, 224, 3]], + }] + + def build_graph(parameters): + """Build the exp op testing graph.""" + input_tensor = tf.placeholder( + dtype=parameters["input_dtype"], + name="input", + shape=parameters["input_shape"]) + + out = tf.exp(input_tensor) + return [input_tensor], [out] + + def build_inputs(parameters, sess, inputs, outputs): + values = [ + create_tensor_data(parameters["input_dtype"], parameters["input_shape"]) + ] + return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_binary_op_tests_func(binary_operator): """Return a function that does a test on a binary operator.""" return lambda zip_path: make_binary_op_tests(zip_path, binary_operator) @@ -1715,6 +1742,7 @@ def main(unused_args): "mean.zip": make_mean_tests, "squeeze.zip": make_squeeze_tests, "strided_slice.zip": make_strided_slice_tests, + "exp.zip": make_exp_tests, } out = FLAGS.zip_to_output bin_path = FLAGS.toco diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 5ea3e21f6a..80e806ab03 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -242,6 +242,7 @@ INSTANTIATE_TESTS(constant) INSTANTIATE_TESTS(control_dep) INSTANTIATE_TESTS(conv) INSTANTIATE_TESTS(depthwiseconv) +INSTANTIATE_TESTS(exp) INSTANTIATE_TESTS(fully_connected) INSTANTIATE_TESTS(fused_batch_norm) INSTANTIATE_TESTS(gather) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 7cddbadac2..ddcc03813f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1327,6 +1327,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kTensorFlowAssert: case OperatorType::kCast: case OperatorType::kFloor: + case OperatorType::kExp: ProcessSimpleOperator(model, op); break; case OperatorType::kGather: diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 41d6c832f0..02c3b2ed9f 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1460,6 +1460,17 @@ void ConvertBatchToSpaceNDOperator(const NodeDef& node, model->operators.emplace_back(op); } +void ConvertExpOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK_EQ(node.op(), "Exp"); + CheckInputsCount(node, tf_import_flags, 1); + auto* op = new ExpOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); +} + void ConvertMeanOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1986,6 +1997,8 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertTransposeOperator(node, tf_import_flags, model); } else if (node.op() == "ArgMax") { ConvertArgMaxOperator(node, tf_import_flags, model); + } else if (node.op() == "Exp") { + ConvertExpOperator(node, tf_import_flags, model); } else { ConvertUnsupportedOperator(node, tf_import_flags, model); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 0bee694387..4c44f3fd66 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -44,6 +44,7 @@ enum class OperatorType { kSpaceToDepth, kDequantize, kDiv, + kExp, kExpandDims, kFill, kFloorDiv, @@ -852,6 +853,17 @@ struct TransposeConvOperator : Operator { int stride_height = 0; }; +// Given a tensor input, this operation calculates element-wise exponential +// (y = e^x). +// +// Inputs: +// inputs[0]: required: input tensor +// +// TensorFlow equivalent: Exp +struct ExpOperator : Operator { + ExpOperator() : Operator(OperatorType::kExp) {} +}; + // Given a tensor input, this operation inserts a dimension of 1 at the // dimension index axis of input's shape. The dimension index axis starts at // zero; if you specify a negative number for axis it is counted backward from diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index ff54b350bf..2583ec0e34 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -835,6 +835,7 @@ std::vector> BuildOperatorList() { "LOGISTIC", OperatorType::kLogistic)); ops.emplace_back( new SimpleOperator("TANH", OperatorType::kTanh)); + ops.emplace_back(new SimpleOperator("EXP", OperatorType::kExp)); return ops; } diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 796534be53..05c325ef91 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -106,6 +106,7 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("RELU6", OperatorType::kRelu6); CheckSimpleOperator("LOGISTIC", OperatorType::kLogistic); CheckSimpleOperator("TANH", OperatorType::kTanh); + CheckSimpleOperator("EXP", OperatorType::kExp); } TEST_F(OperatorTest, BuiltinAdd) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index a5fed23158..627541595b 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -313,6 +313,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(Svdf) HANDLE_OPERATORTYPENAME_CASE(ArgMax) HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported) + HANDLE_OPERATORTYPENAME_CASE(Exp) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE -- GitLab From aa92995493939b674cd54b13b5850cc185a6e7ae Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 14 Feb 2018 14:49:23 -0800 Subject: [PATCH 192/629] [TF:XLA] Add a hook to allow reshaping of TensorFlow variables when storing them in their XLA representation. PiperOrigin-RevId: 185748660 --- tensorflow/compiler/tf2xla/BUILD | 1 + tensorflow/compiler/tf2xla/literal_util.cc | 24 +- tensorflow/compiler/tf2xla/literal_util.h | 9 +- tensorflow/compiler/tf2xla/xla_compiler.cc | 243 +++++++++--------- tensorflow/compiler/tf2xla/xla_compiler.h | 22 +- .../compiler/tf2xla/xla_compiler_test.cc | 124 +++++++++ tensorflow/compiler/tf2xla/xla_context.cc | 16 +- tensorflow/compiler/tf2xla/xla_context.h | 14 +- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 22 +- tensorflow/compiler/tf2xla/xla_op_kernel.h | 2 +- 10 files changed, 341 insertions(+), 136 deletions(-) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 3c7dfef03d..fb82c2601c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -312,6 +312,7 @@ tf_cc_test( "//tensorflow/cc:cc_ops", "//tensorflow/cc:function_ops", "//tensorflow/cc:ops", + "//tensorflow/cc:resource_variable_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/tf2xla/literal_util.cc b/tensorflow/compiler/tf2xla/literal_util.cc index fcbd157c61..2c3cd658e0 100644 --- a/tensorflow/compiler/tf2xla/literal_util.cc +++ b/tensorflow/compiler/tf2xla/literal_util.cc @@ -40,20 +40,20 @@ Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal) { return Status::OK(); } -Status LiteralToHostTensor(const xla::Literal& literal, DataType target_type, - Tensor* host_tensor) { +Status CopyLiteralToHostTensor(const xla::Literal& literal, + Tensor* host_tensor) { + TF_RET_CHECK(xla::ShapeUtil::IsArray(literal.shape()) && + xla::ShapeUtil::ElementsIn(literal.shape()) == + host_tensor->NumElements()); xla::PrimitiveType primitive_type; - TF_RETURN_IF_ERROR(DataTypeToPrimitiveType(target_type, &primitive_type)); + TF_RETURN_IF_ERROR( + DataTypeToPrimitiveType(host_tensor->dtype(), &primitive_type)); if (literal.shape().element_type() != primitive_type) { return errors::InvalidArgument( "Cannot convert literal of type ", xla::PrimitiveType_Name(literal.shape().element_type()), - " to tensor of type ", DataTypeString(target_type)); + " to tensor of type ", DataTypeString(host_tensor->dtype())); } - - TensorShape shape; - TF_RETURN_IF_ERROR(XLAShapeToTensorShape(literal.shape(), &shape)); - *host_tensor = Tensor(target_type, shape); size_t total_bytes = host_tensor->TotalBytes(); if (total_bytes > 0) { const void* src_ptr = literal.untyped_data(); @@ -63,4 +63,12 @@ Status LiteralToHostTensor(const xla::Literal& literal, DataType target_type, return Status::OK(); } +Status LiteralToHostTensor(const xla::Literal& literal, DataType target_type, + Tensor* host_tensor) { + TensorShape shape; + TF_RETURN_IF_ERROR(XLAShapeToTensorShape(literal.shape(), &shape)); + *host_tensor = Tensor(target_type, shape); + return CopyLiteralToHostTensor(literal, host_tensor); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/literal_util.h b/tensorflow/compiler/tf2xla/literal_util.h index fe08e83c23..f283b02368 100644 --- a/tensorflow/compiler/tf2xla/literal_util.h +++ b/tensorflow/compiler/tf2xla/literal_util.h @@ -29,7 +29,8 @@ namespace tensorflow { // unsupported type. Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal); -// Copies 'literal' to 'host_tensor', which is allocated of type . +// Copies 'literal' to freshly allocated 'host_tensor', which is allocated of +// type . // Fails if the literal's primitive type != // DataTypeToPrimitiveType(target_type). Note that is not // derivable from the type of , because multiple tensorflow types map @@ -38,6 +39,12 @@ Status HostTensorToLiteral(const Tensor& host_tensor, xla::Literal* literal); Status LiteralToHostTensor(const xla::Literal& literal, DataType target_type, Tensor* host_tensor); +// Copies the contents of 'literal' to a previously allocated tensor +// 'host_tensor'. The tensor and the literal must have the same number of +// elements and the same type. +Status CopyLiteralToHostTensor(const xla::Literal& literal, + Tensor* host_tensor); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_LITERAL_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 59e8830442..15bba46ac6 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -109,6 +109,12 @@ XlaCompiler::XlaCompiler(XlaCompiler::Options options) local_flib_runtime_ = local_pflr_->GetFLR(device_->name()); flib_runtime_ = pflr_->GetFLR(device_->name()); + + // The default variable representation shape is the identity function. + if (!options_.variable_representation_shape_fn) { + options_.variable_representation_shape_fn = + [](const TensorShape& shape, DataType type) { return shape; }; + } } XlaCompiler::~XlaCompiler() = default; @@ -223,8 +229,8 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options, } // Computes the XLA shape for argument 'arg'. -/*static*/ Status XlaCompiler::XLAShapeForArgument( - const XlaCompiler::Argument& arg, xla::Shape* xla_shape) { +Status XlaCompiler::XLAShapeForArgument(const XlaCompiler::Argument& arg, + xla::Shape* xla_shape) { switch (arg.kind) { case XlaCompiler::Argument::kConstant: return TensorShapeToXLAShape(arg.type, arg.constant_value.shape(), @@ -235,8 +241,12 @@ Status XlaCompiler::CompileFunction(const XlaCompiler::CompileOptions& options, TF_RET_CHECK(arg.initialized); switch (arg.resource_kind) { - case XlaResource::kVariable: - return TensorShapeToXLAShape(arg.type, arg.shape, xla_shape); + case XlaResource::kVariable: { + TensorShape representation_shape = + options_.variable_representation_shape_fn(arg.shape, arg.type); + return TensorShapeToXLAShape(arg.type, representation_shape, + xla_shape); + } case XlaResource::kTensorArray: { if (arg.tensor_array_size < 0) { return errors::InvalidArgument( @@ -310,16 +320,116 @@ Status ExecuteGraph(XlaContext* xla_context, std::unique_ptr graph, return Status::OK(); } +// Builds the XLA computation. +// +// `retvals` is the list of retvals produced by _Retval operators, in index +// order. `variable_map` is a map from variable ID numbers to XlaOpContext +// variable states, generated by the symbolic evaluation. +// If `return_updated_values_for_all_resources` is true, all resources will be +// included in `resource_updates`, regardless of whether their value changed. +// Sets `*num_nonconst_outputs` to the number of outputs of the `computation`. +// Sets `*resource_updates` to a description of resources whose values are +// written by the computation; the variable writes are the last +// `resource_updates.size()` return values from the computation. Each entry in +// `resource_updates` is a (input_index, type) pair, where `input_index` is the +// index of a resource variable argument to the computation, and `type` is the +// type of the final output. +Status BuildComputation( + const std::vector& args, + const std::vector& arg_cores, + const std::vector& retvals, + const std::vector>& resources, + bool return_updated_values_for_all_resources, + xla::ComputationBuilder* builder, xla::Computation* computation, + int* num_computation_outputs, int* num_nonconst_outputs, + std::vector* resource_updates) { + std::vector elems; + elems.reserve(retvals.size()); + for (const XlaExpression& retval : retvals) { + if (!retval.has_constant_value()) { + elems.push_back(retval.handle()); + } + } + *num_nonconst_outputs = elems.size(); + + // Add return values for resources whose values have changed. + std::vector arg_resources; + arg_resources.reserve(resources.size()); + for (const auto& resource : resources) { + if (resource->arg_num() >= 0) { + arg_resources.push_back(resource.get()); + } + } + std::sort(arg_resources.begin(), arg_resources.end(), + [](const XlaResource* a, const XlaResource* b) { + return a->arg_num() < b->arg_num(); + }); + + for (const XlaResource* resource : arg_resources) { + const XlaCompiler::Argument& arg = args[resource->arg_num()]; + const int core = arg_cores[resource->arg_num()]; + DCHECK_LT(resource->arg_num(), arg_cores.size()); + bool modified = + resource->value().handle() != resource->initial_value().handle(); + // TensorArray gradients were modified if their values changed or there are + // any newly created gradients. + for (const auto& grad : resource->tensor_array_gradients()) { + modified = modified || + grad.second->value().handle() != + grad.second->initial_value().handle() || + arg.tensor_array_gradients.count(grad.first) == 0; + } + if (return_updated_values_for_all_resources || modified) { + resource_updates->emplace_back(); + XlaCompiler::ResourceUpdate& update = resource_updates->back(); + update.input_index = resource->arg_num(); + update.type = resource->type(); + update.shape = resource->shape(); + update.modified = modified; + for (const auto& grad : resource->tensor_array_gradients()) { + update.tensor_array_gradients_accessed.insert(grad.first); + } + + // Request that the value be returned on a specific core. + xla::ScopedShardingAssignment assign_sharding( + builder, core == -1 ? tensorflow::gtl::optional() + : xla::sharding_builder::AssignDevice(core)); + + xla::ComputationDataHandle handle; + TF_RETURN_IF_ERROR(resource->Pack(&handle, builder)); + + // Since we can't change the sharding metadata of as this point, + // create a tuple/get-tuple-element combination so that sharding + // assignment will be placed on this value, which will cause the resource + // update to be returned from the same device that provided the resource. + handle = builder->GetTupleElement(builder->Tuple({handle}), 0); + + elems.push_back(handle); + } + } + + *num_computation_outputs = elems.size(); + + // Builds the XLA computation. + builder->Tuple(elems); + xla::StatusOr computation_status = builder->Build(); + if (!computation_status.ok()) { + return computation_status.status(); + } + *computation = computation_status.ConsumeValueOrDie(); + return Status::OK(); +} + +} // namespace + // Builds XLA computations for each of the arguments to the computation. // `args` are the arguments to the computation. -Status BuildArguments(const Graph& graph, - const std::vector& args, - bool use_tuple_arg, xla::ComputationBuilder* builder, - XlaContext* context, std::vector* arg_cores, - std::vector* arg_expressions, - std::vector* input_mapping, - std::vector* input_shapes, - bool is_entry_computation) { +Status XlaCompiler::BuildArguments( + const Graph& graph, const std::vector& args, + bool use_tuple_arg, xla::ComputationBuilder* builder, XlaContext* context, + std::vector* arg_cores, std::vector* arg_expressions, + std::vector* input_mapping, std::vector* input_shapes, + bool is_entry_computation) { arg_expressions->resize(args.size()); *arg_cores = std::vector(args.size(), -1); @@ -374,8 +484,8 @@ Status BuildArguments(const Graph& graph, std::vector arg_shapes(input_mapping->size()); for (std::vector::size_type i = 0; i < input_mapping->size(); ++i) { // Computes the shapes of non-constant arguments. - TF_RETURN_IF_ERROR(XlaCompiler::XLAShapeForArgument( - args[(*input_mapping)[i]], &arg_shapes[i])); + TF_RETURN_IF_ERROR( + XLAShapeForArgument(args[(*input_mapping)[i]], &arg_shapes[i])); } if (use_tuple_arg) { @@ -472,108 +582,6 @@ Status BuildArguments(const Graph& graph, return Status::OK(); } -// Builds the XLA computation. -// -// `retvals` is the list of retvals produced by _Retval operators, in index -// order. `variable_map` is a map from variable ID numbers to XlaOpContext -// variable states, generated by the symbolic evaluation. -// If `return_updated_values_for_all_resources` is true, all resources will be -// included in `resource_updates`, regardless of whether their value changed. -// Sets `*num_nonconst_outputs` to the number of outputs of the `computation`. -// Sets `*resource_updates` to a description of resources whose values are -// written by the computation; the variable writes are the last -// `resource_updates.size()` return values from the computation. Each entry in -// `resource_updates` is a (input_index, type) pair, where `input_index` is the -// index of a resource variable argument to the computation, and `type` is the -// type of the final output. -Status BuildComputation( - const std::vector& args, - const std::vector& arg_cores, - const std::vector& retvals, - const std::vector>& resources, - bool return_updated_values_for_all_resources, - xla::ComputationBuilder* builder, xla::Computation* computation, - int* num_computation_outputs, int* num_nonconst_outputs, - std::vector* resource_updates) { - std::vector elems; - elems.reserve(retvals.size()); - for (const XlaExpression& retval : retvals) { - if (!retval.has_constant_value()) { - elems.push_back(retval.handle()); - } - } - *num_nonconst_outputs = elems.size(); - - // Add return values for resources whose values have changed. - std::vector arg_resources; - arg_resources.reserve(resources.size()); - for (const auto& resource : resources) { - if (resource->arg_num() >= 0) { - arg_resources.push_back(resource.get()); - } - } - std::sort(arg_resources.begin(), arg_resources.end(), - [](const XlaResource* a, const XlaResource* b) { - return a->arg_num() < b->arg_num(); - }); - - for (const XlaResource* resource : arg_resources) { - const XlaCompiler::Argument& arg = args[resource->arg_num()]; - const int core = arg_cores[resource->arg_num()]; - DCHECK_LT(resource->arg_num(), arg_cores.size()); - bool modified = - resource->value().handle() != resource->initial_value().handle(); - // TensorArray gradients were modified if their values changed or there are - // any newly created gradients. - for (const auto& grad : resource->tensor_array_gradients()) { - modified = modified || - grad.second->value().handle() != - grad.second->initial_value().handle() || - arg.tensor_array_gradients.count(grad.first) == 0; - } - if (return_updated_values_for_all_resources || modified) { - resource_updates->emplace_back(); - XlaCompiler::ResourceUpdate& update = resource_updates->back(); - update.input_index = resource->arg_num(); - update.type = resource->type(); - update.shape = resource->shape(); - update.modified = modified; - for (const auto& grad : resource->tensor_array_gradients()) { - update.tensor_array_gradients_accessed.insert(grad.first); - } - - // Request that the value be returned on a specific core. - xla::ScopedShardingAssignment assign_sharding( - builder, core == -1 ? tensorflow::gtl::optional() - : xla::sharding_builder::AssignDevice(core)); - - xla::ComputationDataHandle handle; - TF_RETURN_IF_ERROR(resource->Pack(&handle, builder)); - - // Since we can't change the sharding metadata of as this point, - // create a tuple/get-tuple-element combination so that sharding - // assignment will be placed on this value, which will cause the resource - // update to be returned from the same device that provided the resource. - handle = builder->GetTupleElement(builder->Tuple({handle}), 0); - - elems.push_back(handle); - } - } - - *num_computation_outputs = elems.size(); - - // Builds the XLA computation. - builder->Tuple(elems); - xla::StatusOr computation_status = builder->Build(); - if (!computation_status.ok()) { - return computation_status.status(); - } - *computation = computation_status.ConsumeValueOrDie(); - return Status::OK(); -} - -} // namespace - Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, string const& name, std::unique_ptr graph, @@ -598,7 +606,8 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, xla::ComputationBuilder builder(client(), name); XlaContext* context = new XlaContext(this, &builder, options_.allow_cpu_custom_calls, - options.resolve_compile_time_constants); + options.resolve_compile_time_constants, + &options_.variable_representation_shape_fn); core::ScopedUnref context_unref(context); std::vector arg_expressions; diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index b86c82c0ab..c4449bc4be 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -29,6 +29,9 @@ limitations under the License. #include "tensorflow/core/public/version.h" namespace tensorflow { + +class XlaContext; + // The XlaCompiler class is responsible for compilation of a self-contained // subgraph of a TensorFlow computation using the XLA linear algebra runtime. // It does a symbolic execution of the graph starting from specific input @@ -239,6 +242,12 @@ class XlaCompiler { // for CPU. bool allow_cpu_custom_calls = false; + // If set, the XLA representation of variables represented to XLA as the + // shape given by this shape function. Variables are reshaped to this shape + // on write, and reshaped to their original shape on read. + std::function + variable_representation_shape_fn; + // If not nullptr, populate_resource_manager is called with the // compilation device's resource manager when the compilation // device is created, and can be used to create metadata objects @@ -278,7 +287,7 @@ class XlaCompiler { // Returns the shape of the XLA parameter for an argument 'arg'. // See the class comment for more details about the argument passing // convention. - static Status XLAShapeForArgument(const Argument& arg, xla::Shape* xla_shape); + Status XLAShapeForArgument(const Argument& arg, xla::Shape* xla_shape); // Retrieves the channel handle associated with `key`. Allocates // a new channel handle if none exists. @@ -299,6 +308,17 @@ class XlaCompiler { // Returns the optimized graph object in this function body. std::unique_ptr GetGraph(const FunctionBody* fbody); + // Builds XLA computations for each of the arguments to the computation. + // `args` are the arguments to the computation. + Status BuildArguments(const Graph& graph, + const std::vector& args, + bool use_tuple_arg, xla::ComputationBuilder* builder, + XlaContext* context, std::vector* arg_cores, + std::vector* arg_expressions, + std::vector* input_mapping, + std::vector* input_shapes, + bool is_entry_computation); + // Graph compiler needs to know how to get an optimized graph from a function // body. friend class GraphCompiler; diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 65de4dbad7..a18eeacd41 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/ops/data_flow_ops.h" #include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -683,5 +684,128 @@ TEST_F(XlaCompilerTest, LocalFunctionWithWrongArgumentsFail) { << status.error_message(); } +// Tests a simple graph that reads and writes a variable. +TEST_F(XlaCompilerTest, Variables) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); + auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1); + auto write = ops::AssignAddVariableOp(scope, var, a); + auto read = ops::ReadVariableOp( + scope.WithControlDependencies(std::vector{write}), var, + DT_INT32); + auto read_plus_one = ops::Add(scope, read, ops::Const(scope, 1)); + auto d = ops::_Retval(scope.WithOpName("D"), read_plus_one, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(2); + args[0].kind = XlaCompiler::Argument::kParameter; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2}); + args[1].kind = XlaCompiler::Argument::kResource; + args[1].resource_kind = XlaResource::kVariable; + args[1].initialized = true; + args[1].type = DT_INT32; + args[1].shape = TensorShape({2}); + + // Compiles the graph. + XlaCompiler compiler(DefaultOptions()); + + XlaCompiler::CompilationResult result; + TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "add", + std::move(graph), args, &result)); + + // Tests that the generated computation works. + std::unique_ptr param0_literal = + xla::Literal::CreateR1({7, 42}); + std::unique_ptr param1_literal = + xla::Literal::CreateR1({-3, 101}); + std::unique_ptr param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + std::unique_ptr actual = + client_ + ->Execute(*result.computation, {param0_data.get(), param1_data.get()}) + .ConsumeValueOrDie(); + std::unique_ptr actual_literal = + client_->Transfer(*actual).ConsumeValueOrDie(); + + std::unique_ptr expected0 = + xla::Literal::CreateR1({5, 144}); + std::unique_ptr expected1 = + xla::Literal::CreateR1({4, 143}); + std::unique_ptr expected_literal = + xla::Literal::MakeTuple({expected0.get(), expected1.get()}); + xla::LiteralTestUtil::ExpectEqual(*expected_literal, *actual_literal); +} + +// Tests a simple graph that reads and writes a variable, with a +// variable_representation_shape_fn passed to the compiler that flattens all +// variable tensors to vectors. +TEST_F(XlaCompilerTest, VariableRepresentationShapeFunction) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); + auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1); + auto write = ops::AssignAddVariableOp(scope, var, a); + auto read = ops::ReadVariableOp( + scope.WithControlDependencies(std::vector{write}), var, + DT_INT32); + auto read_plus_one = ops::Add(scope, read, ops::Const(scope, 1)); + auto d = ops::_Retval(scope.WithOpName("D"), read_plus_one, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(2); + args[0].kind = XlaCompiler::Argument::kParameter; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2, 2}); + args[1].kind = XlaCompiler::Argument::kResource; + args[1].resource_kind = XlaResource::kVariable; + args[1].initialized = true; + args[1].type = DT_INT32; + args[1].shape = TensorShape({2, 2}); + + // Compiles the graph. + XlaCompiler::Options options = DefaultOptions(); + options.variable_representation_shape_fn = [](const TensorShape& shape, + DataType type) { + return TensorShape({shape.num_elements()}); + }; + XlaCompiler compiler(options); + + XlaCompiler::CompilationResult result; + TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "add", + std::move(graph), args, &result)); + + // Tests that the generated computation works. + std::unique_ptr param0_literal = + xla::Literal::CreateR2({{4, 55}, {1, -3}}); + std::unique_ptr param1_literal = + xla::Literal::CreateR1({22, 11, 33, 404}); + std::unique_ptr param0_data = + client_->TransferToServer(*param0_literal).ConsumeValueOrDie(); + std::unique_ptr param1_data = + client_->TransferToServer(*param1_literal).ConsumeValueOrDie(); + + std::unique_ptr actual = + client_ + ->Execute(*result.computation, {param0_data.get(), param1_data.get()}) + .ConsumeValueOrDie(); + std::unique_ptr actual_literal = + client_->Transfer(*actual).ConsumeValueOrDie(); + + std::unique_ptr expected0 = + xla::Literal::CreateR2({{27, 67}, {35, 402}}); + std::unique_ptr expected1 = + xla::Literal::CreateR1({26, 66, 34, 401}); + std::unique_ptr expected_literal = + xla::Literal::MakeTuple({expected0.get(), expected1.get()}); + xla::LiteralTestUtil::ExpectEqual(*expected_literal, *actual_literal); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index 73878955e3..8423921086 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -62,13 +62,16 @@ void XlaContext::set_args(std::vector args) { args_ = std::move(args); } -XlaContext::XlaContext(XlaCompiler* compiler, xla::ComputationBuilder* builder, - bool allow_cpu_custom_calls, - bool resolve_compile_time_constants) +XlaContext::XlaContext( + XlaCompiler* compiler, xla::ComputationBuilder* builder, + bool allow_cpu_custom_calls, bool resolve_compile_time_constants, + const std::function* + variable_representation_shape_fn) : compiler_(compiler), builder_(builder), allow_cpu_custom_calls_(allow_cpu_custom_calls), - resolve_compile_time_constants_(resolve_compile_time_constants) {} + resolve_compile_time_constants_(resolve_compile_time_constants), + variable_representation_shape_fn_(variable_representation_shape_fn) {} string XlaContext::DebugString() { return "TLA JIT context"; } @@ -115,6 +118,11 @@ Status XlaContext::CreateResource( return Status::OK(); } +TensorShape XlaContext::VariableRepresentationShape(const TensorShape& shape, + DataType type) const { + return (*variable_representation_shape_fn_)(shape, type); +} + const xla::Computation* XlaContext::GetOrCreateMax(const DataType type) { return LookupOrCreate(type, &max_func_, [this, type] { const string type_string = DataTypeString(type); diff --git a/tensorflow/compiler/tf2xla/xla_context.h b/tensorflow/compiler/tf2xla/xla_context.h index fac0352ae8..00fbaba37c 100644 --- a/tensorflow/compiler/tf2xla/xla_context.h +++ b/tensorflow/compiler/tf2xla/xla_context.h @@ -44,7 +44,9 @@ class XlaContext : public ResourceBase { // Creates a new XlaContext. XlaContext(XlaCompiler* compiler, xla::ComputationBuilder* builder, - bool allow_cpu_custom_calls, bool resolve_compile_time_constants); + bool allow_cpu_custom_calls, bool resolve_compile_time_constants, + const std::function* + variable_representation_shape_fn); // Virtual method defined by ResourceBase. string DebugString() override; @@ -86,6 +88,11 @@ class XlaContext : public ResourceBase { return resources_; } + // Returns the XLA shape to be used to represent a variable of TF `shape` + // and `type`. + TensorShape VariableRepresentationShape(const TensorShape& shape, + DataType type) const; + // Get an XLA lambda to compute Max. This is cached in the // XlaContext since it may be used by multiple Ops. There is a // separate specialization of the computation for each DataType. @@ -133,6 +140,11 @@ class XlaContext : public ResourceBase { // Holds ownership of resources. The resources are not ordered. std::vector> resources_; + // A function that describes how variable shapes should be represented + // in XLA. Variable values will be reshaped to this shape. Must be non-null. + const std::function* + variable_representation_shape_fn_; + // Cache of prebuilt computations indexed by their type. using ComputationMap = std::map; diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index ee29158646..c4bb90d587 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -302,10 +302,19 @@ Status XlaOpKernelContext::ReadVariableInput( "Type mismatch for read of variable ", variable->name(), ". Expected ", DataTypeString(type), "; got ", DataTypeString(variable->type())); } - *value = variable->value(); if (shape) { *shape = variable->shape(); } + + XlaContext& xla_context = XlaContext::Get(context_); + TensorShape representation_shape = xla_context.VariableRepresentationShape( + variable->shape(), variable->type()); + if (representation_shape == variable->shape()) { + *value = variable->value(); + } else { + *value = + builder()->Reshape(variable->value(), variable->shape().dim_sizes()); + } return Status::OK(); } @@ -400,8 +409,8 @@ Status XlaOpKernelContext::GetResourceInput(int index, XlaResource** resource) { return Status::OK(); } -Status XlaOpKernelContext::AssignVariable( - int input_index, DataType type, const xla::ComputationDataHandle& handle) { +Status XlaOpKernelContext::AssignVariable(int input_index, DataType type, + xla::ComputationDataHandle handle) { TF_RET_CHECK(handle.handle() != 0); const XlaExpression* expression = @@ -419,6 +428,13 @@ Status XlaOpKernelContext::AssignVariable( XLAShapeToTensorShape(*shape_or_status.ValueOrDie(), &shape)); TF_RETURN_IF_ERROR(variable->SetTypeAndShape(type, shape)); + + XlaContext& xla_context = XlaContext::Get(context_); + TensorShape representation_shape = + xla_context.VariableRepresentationShape(shape, type); + if (shape != representation_shape) { + handle = builder()->Reshape(handle, representation_shape.dim_sizes()); + } return variable->SetValue(handle); } diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index e1fd0f55c6..4e4b97e0ce 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -175,7 +175,7 @@ class XlaOpKernelContext { // variable has been initialized with a different type or with a // different shape. Status AssignVariable(int input_index, DataType type, - const xla::ComputationDataHandle& handle); + xla::ComputationDataHandle handle); // Helper routines for the OP_REQUIRES macros void CtxFailure(const Status& s); -- GitLab From 15d33641cd6bc1801ede791eccd39a9678a0d64c Mon Sep 17 00:00:00 2001 From: Tatiana Shpeisman Date: Wed, 14 Feb 2018 15:19:11 -0800 Subject: [PATCH 193/629] Build error fix - make allocator_ to be VisitableAllocator* instead of Allocator*. Allocator does not have AddAllocVisitor() and AddFreeVisitor() methods. PiperOrigin-RevId: 185753346 --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 0eb47f4e56..2a67c039ac 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -25,7 +25,7 @@ limitations under the License. #include #include #include "tensorflow/core/common_runtime/bfc_allocator.h" -#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/common_runtime/visitable_allocator.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/mem.h" @@ -161,7 +161,7 @@ class MklCPUAllocator : public VisitableAllocator { /// The alignment that we need for the allocations static const size_t kAlignment = 64; - Allocator* allocator_; // owned by this class + VisitableAllocator* allocator_; // owned by this class }; } // namespace tensorflow -- GitLab From 5b984d1bd80768a954063f9bb220373ff459bbfd Mon Sep 17 00:00:00 2001 From: Chris Leary Date: Wed, 14 Feb 2018 15:37:15 -0800 Subject: [PATCH 194/629] [XLA:python] Add ability to set result layouts via Python API. PiperOrigin-RevId: 185755948 --- tensorflow/compiler/xla/client/computation.cc | 10 +++++++ tensorflow/compiler/xla/client/computation.h | 4 +++ .../xla/python/local_computation_builder.cc | 6 +++++ .../xla/python/local_computation_builder.h | 5 ++++ .../xla/python/local_computation_builder.i | 16 +++++++++++ tensorflow/compiler/xla/python/xla_client.py | 27 +++++++++++++++++++ 6 files changed, 68 insertions(+) diff --git a/tensorflow/compiler/xla/client/computation.cc b/tensorflow/compiler/xla/client/computation.cc index 4baea8df6e..e6c57bda0f 100644 --- a/tensorflow/compiler/xla/client/computation.cc +++ b/tensorflow/compiler/xla/client/computation.cc @@ -64,4 +64,14 @@ void Computation::ResetWithoutFreeing() { parent_ = nullptr; } +StatusOr Computation::GetProgramShape() const { + GetComputationShapeRequest request; + *request.mutable_computation() = handle_; + GetComputationShapeResponse response; + + TF_RETURN_IF_ERROR(parent_->GetComputationShape(&request, &response)); + + return std::move(*response.mutable_program_shape()); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/computation.h b/tensorflow/compiler/xla/client/computation.h index b595172486..a53fc9e9cf 100644 --- a/tensorflow/compiler/xla/client/computation.h +++ b/tensorflow/compiler/xla/client/computation.h @@ -60,6 +60,10 @@ class Computation { // Returns true if this object is a null Computation. bool IsNull() const { return parent_ == nullptr; } + // Returns the "program shape" (parameter and return shapes) for this + // computation. + StatusOr GetProgramShape() const; + private: void ResetWithoutFreeing(); diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index a89146d448..cb7bb21e09 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -278,6 +278,12 @@ const Computation& LocalComputation::computation() const { return computation_; } +StatusOr LocalComputation::GetReturnValueShape() const { + TF_ASSIGN_OR_RETURN(ProgramShape program_shape, + computation_.GetProgramShape()); + return std::move(*program_shape.mutable_result()); +} + LocalComputationBuilder::LocalComputationBuilder(const string& computation_name) : builder_(GetOrCreateLocalClient(), computation_name) {} diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index d682204d26..d3e9503ea1 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -102,11 +102,16 @@ class CompiledLocalComputation { class LocalComputation { public: LocalComputation(Computation computation); + StatusOr Compile( const std::vector& argument_shapes, const ExecutableBuildOptions* build_options); + const Computation& computation() const; + // Returns the return-value shape for this computation. + StatusOr GetReturnValueShape() const; + private: Computation computation_; }; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index fa6c8bfa29..456e341f87 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -832,6 +832,21 @@ tensorflow::ImportNumpy(); } Py_DECREF(o); + o = PyObject_GetAttrString($input, "result_shape"); + if (o == nullptr) { + return nullptr; + } + if (o != Py_None) { + StatusOr statusor = numpy::XlaShapeFromPyShape(o); + if (!statusor.ok()) { + PyErr_SetString(PyExc_TypeError, tensorflow::strings::StrCat("ExecutableBuildOptions.result_shape could not be created from Python shape value: ", statusor.status().ToString()).c_str()); + Py_DECREF(o); + return NULL; + } + build_options.set_result_layout(statusor.ValueOrDie()); + } + Py_DECREF(o); + $1 = &build_options; } } @@ -852,6 +867,7 @@ tensorflow::ImportNumpy(); %unignore xla::swig::CompiledLocalComputation::ExecuteWithShapedBuffers; %unignore xla::swig::LocalComputation; %unignore xla::swig::LocalComputation::Compile; +%unignore xla::swig::LocalComputation::GetReturnValueShape; %unignore xla::swig::LocalComputationBuilder; %unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder; %unignore xla::swig::LocalComputationBuilder::Build; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 2489ad9509..bcff9508fb 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -360,17 +360,44 @@ class LocalComputation(object): # Ensure a reference to C-based destructor for use in __del__. if is_compiled: + assert isinstance(c_local_computation, c_api.CompiledLocalComputation) self._delete = c_api.DeleteCompiledLocalComputation else: + assert isinstance(c_local_computation, c_api.LocalComputation) self._delete = c_api.DeleteLocalComputation def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None): + """Compiles an un-compiled local computation. + + Local computations are the result of a "LocalComputationBuild'ing" process + -- they start in uncompiled form, and via a call to Compile() turn into a + compiled local computation. + + Raises: + ValueError: if this is already a compiled local computation. + + Arguments: + argument_shapes: parameter shapes -- they are first laid out by layout_fn + if layout_fn is provided. Otherwise, the default layout for those shapes + will be used. + compile_options: options to use for compilation, includes an optional + laid out result shape for the computation. + layout_fn: lambda that is used to lay out the argument/result shapes. + + Returns: + A newly *compiled* local computation instance. + """ if self.is_compiled: raise ValueError('Attempt to compile a compiled local XLA computation.') + if layout_fn: argument_shapes = [ shape.map_leaves(layout_fn) for shape in argument_shapes ] + result_shape = _wrap_shape(self.c_local_computation.GetReturnValueShape()) + result_shape = result_shape.map_leaves(layout_fn) + compile_options = compile_options or CompileOptions() + compile_options.result_shape = result_shape return LocalComputation( self.c_local_computation.Compile(argument_shapes, compile_options), is_compiled=True) -- GitLab From c752b5f1d73214e880f9020c93a768e5d9109006 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 16:01:26 -0800 Subject: [PATCH 195/629] Add dedicated code for the print function instead of wrapping it generically to py_func. For now, we keep tf.Print disabled until we can find a way to test it. This might require launching the compiled code in a Python subprocess. PiperOrigin-RevId: 185759599 --- .../py2tf/converters/builtin_functions.py | 25 +++---- .../converters/builtin_functions_test.py | 69 +++++++++++++------ tensorflow/contrib/py2tf/impl/conversion.py | 3 - tensorflow/contrib/py2tf/utils/BUILD | 11 +++ tensorflow/contrib/py2tf/utils/__init__.py | 1 + tensorflow/contrib/py2tf/utils/printing.py | 47 +++++++++++++ .../contrib/py2tf/utils/printing_test.py | 53 ++++++++++++++ 7 files changed, 173 insertions(+), 36 deletions(-) create mode 100644 tensorflow/contrib/py2tf/utils/printing.py create mode 100644 tensorflow/contrib/py2tf/utils/printing_test.py diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index 310681dd01..2eb00f9057 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -25,36 +25,36 @@ from tensorflow.contrib.py2tf.pyct import transformer class BuiltinFunctionTransformer(transformer.Base): - """Handles builtin functions and canonicalizes old-style print statement. + """Handles builtin functions. This transformer only covers functions that are translated into a TF equivalent, like `len`. - Note that the `print` statement is converted to a function call here, but - wrapping the print function to a `py_func` is done by `call_trees` as a - generic uncompilable function wrap. """ - # TODO(mdan): Handle print entirely in here. - # Fully handling print here makes sense especially since we're considering - # using tf.Print instead. - def __init__(self, context): super(BuiltinFunctionTransformer, self).__init__(context) + # pylint:disable=invalid-name + def _convert_len(self, node): template = """ tf.shape(args)[0] """ - new_call = templates.replace(template, args=node.args)[0].value - return new_call + return templates.replace(template, args=node.args)[0].value - # pylint:disable=invalid-name + def _convert_print(self, node): + template = """ + py2tf_utils.call_print(args) + """ + return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. if isinstance(node.func, gast.Name) and node.func.id == 'len': return self._convert_len(node) + if isinstance(node.func, gast.Name) and node.func.id == 'print': + return self._convert_print(node) return node def visit_Print(self, node): @@ -66,7 +66,8 @@ class BuiltinFunctionTransformer(transformer.Base): template = """ fname(args) """ - return templates.replace(template, fname='print', args=args) + function_call = templates.replace(template, fname='print', args=args)[0] + return self.visit(function_call) # pylint:enable=invalid-name diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions_test.py b/tensorflow/contrib/py2tf/converters/builtin_functions_test.py index 983d1ffc03..b279ff77ef 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions_test.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions_test.py @@ -26,6 +26,8 @@ from tensorflow.contrib.py2tf.converters import builtin_functions from tensorflow.contrib.py2tf.converters import converter_test_base from tensorflow.python.framework import constant_op from tensorflow.python.ops import array_ops +from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import script_ops from tensorflow.python.platform import test @@ -45,7 +47,7 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): sess.run( result.test_fn(constant_op.constant([0, 0, 0])))) - def test_print(self): + def test_print_with_op(self): def test_fn(a): print(a) @@ -53,16 +55,41 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {'print': print}) node = builtin_functions.transform(node, self.ctx) - with self.compiled(node) as result: - try: - out_capturer = six.StringIO() - sys.stdout = out_capturer - result.test_fn('a') - self.assertEqual(out_capturer.getvalue(), 'a\n') - finally: - sys.stdout = sys.__stdout__ + # Note: it's relevant not to include script_ops.py_func here, to verify + # that tf.Print is used. + with self.compiled(node, logging_ops.Print) as result: + with self.test_session() as sess: + try: + out_capturer = six.StringIO() + sys.stdout = out_capturer + result.test_fn('a') + sess.run(sess.graph.get_operations()) + self.assertEqual(out_capturer.getvalue(), 'a\n') + finally: + sys.stdout = sys.__stdout__ + + def test_print_with_op_multiple_values(self): + + def test_fn(a, b): + print(a, b) - def test_print_tuple(self): + node = self.parse_and_analyze(test_fn, {'print': print}) + node = builtin_functions.transform(node, self.ctx) + + # Note: it's relevant not to include script_ops.py_func here, to verify + # that tf.Print is used. + with self.compiled(node, logging_ops.Print) as result: + with self.test_session() as sess: + try: + out_capturer = six.StringIO() + sys.stdout = out_capturer + result.test_fn('a', 1) + sess.run(sess.graph.get_operations()) + self.assertEqual(out_capturer.getvalue(), 'a 1\n') + finally: + sys.stdout = sys.__stdout__ + + def test_print_with_py_func(self): def test_fn(a, b, c): print(a, b, c) @@ -70,18 +97,18 @@ class BuiltinFunctionsTest(converter_test_base.TestCase): node = self.parse_and_analyze(test_fn, {'print': print}) node = builtin_functions.transform(node, self.ctx) - with self.compiled(node) as result: - try: - out_capturer = six.StringIO() - sys.stdout = out_capturer - result.test_fn('a', 1, [2, 3]) - # It appears that the print output looks odd only under Python 2. - if six.PY2: - self.assertEqual(out_capturer.getvalue(), "('a', 1, [2, 3])\n") - else: + # Note: it's relevant not to include logging_ops.Print here, to verify + # that py_func is used. + with self.compiled(node, script_ops.py_func) as result: + with self.test_session() as sess: + try: + out_capturer = six.StringIO() + sys.stdout = out_capturer + result.test_fn('a', 1, [2, 3]) + sess.run(sess.graph.get_operations()) self.assertEqual(out_capturer.getvalue(), 'a 1 [2, 3]\n') - finally: - sys.stdout = sys.__stdout__ + finally: + sys.stdout = sys.__stdout__ if __name__ == '__main__': diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index ca13910ae5..3d5624b187 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -268,9 +268,6 @@ def node_to_graph(node, ctx, nocompile_decorators): node = for_loops.transform(node, ctx) # for_loops may insert new global references. node = builtin_functions.transform(node, ctx) - # TODO(mdan): Kept for CL consistency. Remove. - # builtin_functions may insert new global references. - ctx.namespace['print'] = print node = _static_analysis_pass(node, ctx) node = call_trees.transform(node, ctx, config.DEFAULT_UNCOMPILED_MODULES, diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index a679cb9076..c2fdd40707 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -23,6 +23,7 @@ py_library( "context_managers.py", "misc.py", "multiple_dispatch.py", + "printing.py", "py_func.py", "tensor_list.py", "type_check.py", @@ -75,6 +76,16 @@ py_test( ], ) +py_test( + name = "printing_test", + srcs = ["printing_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":utils", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 838c29aafd..0a1b993fd3 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -22,5 +22,6 @@ from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_o from tensorflow.contrib.py2tf.utils.misc import alias_tensors from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while +from tensorflow.contrib.py2tf.utils.printing import call_print from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/printing.py b/tensorflow/contrib/py2tf/utils/printing.py new file mode 100644 index 0000000000..95a62bd80b --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/printing.py @@ -0,0 +1,47 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TensorFlow printing support utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.ops import logging_ops + + +def is_tf_print_compatible(value): + # TODO(mdan): Enable once we can reliably test this. + # This is currently disabled because we can't capture the output of + # op kernels from Python. + del value + return False + + +def call_print(*values): + """Compiled counterpart of the print builtin. + + The function attempts to use tf.Print if all the values are compatible. + Otherwise, it will fall back to py_func. + + Args: + *values: values to print + Returns: + A dummy value indicating the print completed. If tf. + """ + + if all(map(is_tf_print_compatible, values)): + return logging_ops.Print(1, values) + return py_func.wrap_py_func(print, None, values, use_dummy_return=True) diff --git a/tensorflow/contrib/py2tf/utils/printing_test.py b/tensorflow/contrib/py2tf/utils/printing_test.py new file mode 100644 index 0000000000..2070deb304 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/printing_test.py @@ -0,0 +1,53 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for printing module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sys + +import six + +from tensorflow.contrib.py2tf.utils import printing +from tensorflow.python.platform import test + + +class ContextManagersTest(test.TestCase): + + def test_call_print_tf(self): + try: + out_capturer = six.StringIO() + sys.stdout = out_capturer + with self.test_session() as sess: + sess.run(printing.call_print('test message', 1)) + self.assertEqual(out_capturer.getvalue(), 'test message 1\n') + finally: + sys.stdout = sys.__stdout__ + + def test_call_print_py_func(self): + try: + out_capturer = six.StringIO() + sys.stdout = out_capturer + with self.test_session() as sess: + sess.run(printing.call_print('test message', [1, 2])) + self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n') + finally: + sys.stdout = sys.__stdout__ + + +if __name__ == '__main__': + test.main() -- GitLab From 3bf09baa29055388fd75e53e2437e2a09e57cc6e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 16:16:20 -0800 Subject: [PATCH 196/629] Test to show how TOCO fails with mismatched shapes in strided_slice. PiperOrigin-RevId: 185762074 --- .../contrib/lite/testing/generate_examples.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 7fe46165c7..1cf3430007 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -99,8 +99,10 @@ KNOWN_BUGS = { r"batch_to_space_nd.*crops=\[\[1,1\],\[1,1\]\]": "70594634", # BatchToSpaceND only supports 4D tensors. r"batch_to_space_nd.*input_shape=\[8,2,2,2,1,1\]": "70594733", - # Div will use floordiv - r"div.*int32": "72051395" + # Div will use floordiv. + r"div.*int32": "72051395", + # TOCO require matching dimensions in strided_slice. + r"strided_slice.*begin=\[0\].*end=\[1\].*": "73170889", } @@ -1595,6 +1597,19 @@ def make_strided_slice_tests(zip_path): "shrink_axis_mask": [None, 1, 8, 11, 15, -1], "constant_indices": [False, True], }, + # + { + "dtype": [tf.float32], + "index_type": [tf.int32], + "input_shape": [[12, 2, 2, 5]], + "begin": [[0]], + "end": [[1]], + "strides": [[1]], + "begin_mask": [0], + "end_mask": [0], + "shrink_axis_mask": [1], + "constant_indices": [True], + }, # 2-D { "dtype": [tf.float32, tf.int32, tf.int64], @@ -1610,7 +1625,7 @@ def make_strided_slice_tests(zip_path): }, # Negative strides { - "dtype": [tf.float32, tf.int32, tf.int64], + "dtype": [tf.float32], "index_type": [tf.int32], "input_shape": [[2, 3]], "begin": [[0, -1]], -- GitLab From 1290bbc3ddfda51d55f490962ed32fac1c14f225 Mon Sep 17 00:00:00 2001 From: Sandeep N Gupta <32845615+sandeepngupta@users.noreply.github.com> Date: Wed, 14 Feb 2018 16:23:57 -0800 Subject: [PATCH 197/629] New roadmap (#16984) Updating the roadmap. --- tensorflow/docs_src/about/roadmap.md | 101 ++++++++++++++++++++------- 1 file changed, 75 insertions(+), 26 deletions(-) diff --git a/tensorflow/docs_src/about/roadmap.md b/tensorflow/docs_src/about/roadmap.md index 3ee825ed40..ce9e619b10 100644 --- a/tensorflow/docs_src/about/roadmap.md +++ b/tensorflow/docs_src/about/roadmap.md @@ -1,37 +1,86 @@ # Roadmap -**Last updated: January 23, 2017** +**Last updated: Feb 12, 2018** -TensorFlow is a fast moving project. In order for the community to better -understand what the near future will bring, this document shares what we are -working on internally. Many of these features were requested by the community, -and we welcome -[contributions](https://github.com/tensorflow/tensorflow/labels/stat%3Acontributions%20welcome). +TensorFlow is a rapidly moving, community supported project. This document is intended +to provide guidance about priorities and focus areas of the core set of TensorFlow +developers and about functionality that can be expected in the upcoming releases of +TensorFlow. Many of these areas are driven by community use cases, and we welcome +further +[contributions](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) +to TensorFlow. -The features on this list are targeted for the next few months. At this point, -we do not have timelines for these features. +The features below do not have concrete release dates. However, the majority can be +expected in the next one to two releases. -### Improve non-Python language support +### APIs +#### High Level APIs: +* Easy multi-GPU utilization with Estimators +* Easy-to-use high-level pre-made estimators for Gradient Boosted Trees, Time Series, and other models -* Support for adding gradient computation for graphs constructed in other - languages (C++, Java, Go etc.) +#### Eager Execution: +* Efficient utilization of multiple GPUs +* Distributed training (multi-machine) +* Performance improvements +* Simpler export to a GraphDef/SavedModel -### Making TensorFlow easier to use -* High-level APIs -* Well-maintained models showing best practices +#### Keras API: +* Better integration with tf.data (ability to call `model.fit` with data tensors) +* Full support for Eager execution (both Eager support for the regular Keras API, and ability +to create Keras models Eager- style via Model subclassing) +* Better distribution/multi-GPU support and TPU support (including a smoother model-to-estimator workflow) -### Performance -* Speed and memory benchmarks -* Distributed full model benchmarks -* Performance and memory usage improvements +#### Official Models: +* A set of +[reference models](https://github.com/tensorflow/models/tree/master/official) +across image recognition, speech, object detection, and + translation that demonstrate best practices and serve as a starting point for + high-performance model development. + +#### Contrib: +* Deprecation notices added to parts of tf.contrib where preferred implementations exist outside of tf.contrib. +* As much as possible, large projects inside tf.contrib moved to separate repositories. +* The tf.contrib module will eventually be discontinued in its current form, experimental development will in future happen in other repositories. -### Core Features -* Automatic op placement ([#2126](https://github.com/tensorflow/tensorflow/issues/2126)) -* Support for graph-level functions + +#### Probabilistic Reasoning and Statistical Analysis: +* Rich set of tools for probabilistic and statistical analysis in tf.distributions + and tf.probability. These include new samplers, layers, optimizers, losses, and structured models +* Statistical tools for hypothesis testing, convergence diagnostics, and sample statistics +* Edward 2.0: High-level API for probabilistic programming ### Platforms -* OpenCL support ([#22](https://github.com/tensorflow/tensorflow/issues/22)) +#### TFLite: +* Increased coverage of supported ops in TFLite +* Easier conversion of a trained TF graph for use on TFLite +* Support for GPU acceleration in TFLite (iOS and Andorid) +* Support for hardware accelerators via Android NeuralNets API +* Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation) +* Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M) + +### Performance +#### Distributed TensorFlow: +* Multi-GPU support optimized for a variety of GPU topologies +* Improved mechanisms for distributing computations on several machines + +#### Optimizations: +* Mixed precision training support with initial example model and guide +* Native TensorRT support +* Int8 support for SkyLake via MKL +* Dynamic loading of SIMD-optimized kernels + +### Documentation and Usability: +* Updated documentation, tutorials and Getting Started guides +* Process to enable external contributions to tutorials, documentation, and blogs showcasing best practice use-cases of TensorFlow and high-impact applications + +### Community and Partner Engagement +#### Special Interest Groups: +* Mobilizing the community to work together in focused domains +* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute) +: build and packaging of TF +* More to be identified and launched -### Community -* More educational resources -* Better integration of TensorFlow into the opensource big data ecosystem (e.g. -[#2655](https://github.com/tensorflow/tensorflow/issues/2655)) +#### Community: +* Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process +* Formalize process for external contributions to land in TensorFlow and associated projects +* Grow global TF communities and user groups +* Collaborate with partners to co-develop and publish research papers -- GitLab From 59a7de4ce8696adcd360f0c8a9fe4d5efa90e99d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 16:34:29 -0800 Subject: [PATCH 198/629] Remove dynamic shape check from Bernoulli. PiperOrigin-RevId: 185764927 --- .../python/ops/distributions/bernoulli.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py index 1f300b7147..4c16d62e9a 100644 --- a/tensorflow/python/ops/distributions/bernoulli.py +++ b/tensorflow/python/ops/distributions/bernoulli.py @@ -22,7 +22,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import random_ops @@ -137,21 +136,12 @@ class Bernoulli(distribution.Distribution): return (array_ops.ones_like(event) * logits, array_ops.ones_like(logits) * event) - # First check static shape. - if (event.get_shape().is_fully_defined() and - logits.get_shape().is_fully_defined()): - if event.get_shape() != logits.get_shape(): - logits, event = _broadcast(logits, event) - else: - logits, event = control_flow_ops.cond( - distribution_util.same_dynamic_shape(logits, event), - lambda: (logits, event), - lambda: _broadcast(logits, event)) + if not (event.get_shape().is_fully_defined() and + logits.get_shape().is_fully_defined() and + event.get_shape() == logits.get_shape()): + logits, event = _broadcast(logits, event) return -nn.sigmoid_cross_entropy_with_logits(labels=event, logits=logits) - def _prob(self, event): - return math_ops.exp(self._log_prob(event)) - def _entropy(self): return (-self.logits * (math_ops.sigmoid(self.logits) - 1) + nn.softplus(-self.logits)) -- GitLab From 808371b33509aa87f38e8367e68058c2eb6df4ef Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 14 Feb 2018 17:04:15 -0800 Subject: [PATCH 199/629] Removes odd stack traces from trying to delete things that aren't resources. Or at least provides a more informative error message. A guess is that these came from constructing SummaryWriter(None). PiperOrigin-RevId: 185768814 --- tensorflow/contrib/summary/summary_ops.py | 2 +- tensorflow/python/ops/resource_variable_ops.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index 068ae35c71..b6249fc92f 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -110,7 +110,7 @@ class SummaryWriter(object): def __init__(self, resource): self._resource = resource - if context.in_eager_mode(): + if context.in_eager_mode() and self._resource is not None: self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="cpu:0") diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 8d6a4df6bf..25cf5aca83 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -107,6 +107,10 @@ class EagerResourceDeleter(object): """ def __init__(self, handle, handle_device): + if not isinstance(handle, ops.Tensor): + raise ValueError( + ("Passed handle=%s to EagerResourceDeleter. Was expecting a handle " + "Tensor." % (handle,))) self._handle = handle self._handle_device = handle_device -- GitLab From 5b3b689cc307632d38f08b1c7f6952bbe7a7ad7e Mon Sep 17 00:00:00 2001 From: Yangzihao Wang Date: Wed, 14 Feb 2018 17:28:18 -0800 Subject: [PATCH 200/629] Add env-var to specify whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT for cudnn batchnorm. PiperOrigin-RevId: 185771595 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 31 ++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index b6abd42767..58b4706766 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -577,7 +577,7 @@ class ScopedFilterDescriptor { // A helper function to decide whether to enable the TENSOR_OP_MATH math type static bool TensorOpMathEnabled() { static bool is_enabled = [] { - bool is_disabled; + bool is_disabled = false; TF_CHECK_OK( tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_TENSOR_OP_MATH", /*default_val=*/false, &is_disabled)); @@ -586,6 +586,25 @@ static bool TensorOpMathEnabled() { return is_enabled; } +// A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT +// in batchnorm. This mode can be faster in some tasks because an optimized path +// may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute +// capability 6.0 or higher. The reason we set it to false by default is that +// this mode may use scaled atomic integer reduction that may cause a numerical +// overflow for certain input data range. +// TODO(yangzihao): Use autotune to choose between this mode and +// CUDNN_BATCHNORM_SPATIAL mode. +static bool BatchnormSpatialPersistentEnabled() { + static bool is_enabled = [] { + bool is_enabled = false; + TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar( + "TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT", + /*default_val=*/false, &is_enabled)); + return is_enabled; + }(); + return is_enabled; +} + // Turns a ConvolutionDescriptor structure into a cudnn convolution handle // within a scope. class ScopedConvolutionDescriptor { @@ -2773,6 +2792,11 @@ bool CudnnSupport::DoBatchNormalizationForwardImpl( ScopedTensorDescriptor scale_offset_descriptor{ parent_, scale_offset_desc, ToCudnnDataType(scale_data_type)}; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; +#if CUDNN_VERSION >= 7000 + if (BatchnormSpatialPersistentEnabled()) { + mode = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + } +#endif float one = 1.0; float zero = 0.0; @@ -2874,6 +2898,11 @@ bool CudnnSupport::DoBatchNormalizationBackwardImpl( parent_, scale_offset_desc, static_cast(cudnn_scale_type)}; cudnnBatchNormMode_t mode = CUDNN_BATCHNORM_SPATIAL; +#if CUDNN_VERSION >= 7000 + if (BatchnormSpatialPersistentEnabled()) { + mode = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + } +#endif float one = 1.0; float zero = 0.0; -- GitLab From 61207fad1557da22a0d06bb22e35f23bd9ca4938 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 14 Feb 2018 17:51:17 -0800 Subject: [PATCH 201/629] Extract ReadOutput method to test runner PiperOrigin-RevId: 185773920 --- tensorflow/contrib/lite/testing/BUILD | 16 +++++++ tensorflow/contrib/lite/testing/join.h | 42 ++++++++++++++++++ tensorflow/contrib/lite/testing/join_test.cc | 43 +++++++++++++++++++ tensorflow/contrib/lite/testing/test_runner.h | 4 ++ .../contrib/lite/testing/test_runner_test.cc | 1 + tensorflow/contrib/lite/testing/tf_driver.cc | 11 +---- tensorflow/contrib/lite/testing/tf_driver.h | 2 +- .../contrib/lite/testing/tflite_driver.h | 1 + 8 files changed, 110 insertions(+), 10 deletions(-) create mode 100644 tensorflow/contrib/lite/testing/join.h create mode 100644 tensorflow/contrib/lite/testing/join_test.cc diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 8739ffb34c..87945353cf 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -122,6 +122,21 @@ cc_test( ], ) +cc_library( + name = "join", + hdrs = ["join.h"], +) + +cc_test( + name = "join_test", + size = "small", + srcs = ["join_test.cc"], + deps = [ + ":join", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "tflite_driver", srcs = ["tflite_driver.cc"], @@ -201,6 +216,7 @@ cc_library( srcs = ["tf_driver.cc"], hdrs = ["tf_driver.h"], deps = [ + ":join", ":split", ":test_runner", "//tensorflow/core:core_cpu", diff --git a/tensorflow/contrib/lite/testing/join.h b/tensorflow/contrib/lite/testing/join.h new file mode 100644 index 0000000000..ce8c072a21 --- /dev/null +++ b/tensorflow/contrib/lite/testing/join.h @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TESTING_JOIN_H_ +#define TENSORFLOW_CONTRIB_LITE_TESTING_JOIN_H_ + +#include +#include +#include + +namespace tflite { +namespace testing { + +// Join a list of data separated by delimieter. +template +string Join(T* data, size_t len, const string& delimiter) { + if (len == 0 || data == nullptr) { + return ""; + } + std::stringstream result; + result << data[0]; + for (int i = 1; i < len; i++) { + result << delimiter << data[i]; + } + return result.str(); +} + +} // namespace testing +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_TESTING_JOIN_H_ diff --git a/tensorflow/contrib/lite/testing/join_test.cc b/tensorflow/contrib/lite/testing/join_test.cc new file mode 100644 index 0000000000..bd04528381 --- /dev/null +++ b/tensorflow/contrib/lite/testing/join_test.cc @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/join.h" + +#include +#include + +namespace tflite { +namespace testing { +namespace { + +TEST(JoinTest, JoinInt) { + std::vector data = {1, 2, 3}; + EXPECT_EQ(Join(data.data(), data.size(), ","), "1,2,3"); +} + +TEST(JoinTest, JoinFloat) { + float data[] = {1.0, -3, 2.3, 1e-5}; + EXPECT_EQ(Join(data, 4, " "), "1 -3 2.3 1e-05"); +} + +TEST(JoinTest, JoinNullData) { EXPECT_THAT(Join(nullptr, 3, ","), ""); } + +TEST(JoinTest, JoinZeroData) { + std::vector data; + EXPECT_THAT(Join(data.data(), 0, ","), ""); +} + +} // namespace +} // namespace testing +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/test_runner.h b/tensorflow/contrib/lite/testing/test_runner.h index 60eaafa474..05770beee2 100644 --- a/tensorflow/contrib/lite/testing/test_runner.h +++ b/tensorflow/contrib/lite/testing/test_runner.h @@ -68,6 +68,10 @@ class TestRunner { // satisfied. virtual bool CheckResults() = 0; + // Read contents of tensor into csv format. + // The given 'id' is guaranteed to be one of the ids returned by GetOutputs(). + virtual string ReadOutput(int id) = 0; + // Set the base path for loading models. void SetModelBaseDir(const string& path) { model_base_dir_ = path; diff --git a/tensorflow/contrib/lite/testing/test_runner_test.cc b/tensorflow/contrib/lite/testing/test_runner_test.cc index f712a5347a..3f04aa20bd 100644 --- a/tensorflow/contrib/lite/testing/test_runner_test.cc +++ b/tensorflow/contrib/lite/testing/test_runner_test.cc @@ -31,6 +31,7 @@ class ConcreteTestRunner : public TestRunner { void ResetTensor(int id) override {} void SetInput(int id, const string& csv_values) override {} void SetExpectation(int id, const string& csv_values) override {} + string ReadOutput(int id) override { return ""; } void Invoke() override {} bool CheckResults() override { return true; } bool CheckFloatSizes(size_t bytes, size_t values) { diff --git a/tensorflow/contrib/lite/testing/tf_driver.cc b/tensorflow/contrib/lite/testing/tf_driver.cc index da6c6ce7b1..2c253bb198 100644 --- a/tensorflow/contrib/lite/testing/tf_driver.cc +++ b/tensorflow/contrib/lite/testing/tf_driver.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include "tensorflow/contrib/lite/testing/join.h" #include "tensorflow/contrib/lite/testing/split.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -52,16 +53,8 @@ void FillTensorWithZeros(tensorflow::Tensor* tensor) { template string TensorDataToCsvString(const tensorflow::Tensor& tensor) { - std::stringstream stream; const auto& data = tensor.flat(); - if (data.size() == 0) { - return ""; - } - stream << data(0); - for (int i = 1; i < data.size(); i++) { - stream << "," << data(i); - } - return stream.str(); + return Join(data.data(), data.size(), ","); } } // namespace diff --git a/tensorflow/contrib/lite/testing/tf_driver.h b/tensorflow/contrib/lite/testing/tf_driver.h index 2928e57282..b766f85c4d 100644 --- a/tensorflow/contrib/lite/testing/tf_driver.h +++ b/tensorflow/contrib/lite/testing/tf_driver.h @@ -41,7 +41,7 @@ class TfDriver : public TestRunner { void LoadModel(const string& bin_file_path) override; void SetInput(int id, const string& csv_values) override; void Invoke() override; - string ReadOutput(int id); + string ReadOutput(int id) override; const std::vector& GetInputs() override { return input_ids_; } const std::vector& GetOutputs() override { return output_ids_; } diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h index 25689a9fb4..02b7de1534 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.h +++ b/tensorflow/contrib/lite/testing/tflite_driver.h @@ -45,6 +45,7 @@ class TfLiteDriver : public TestRunner { void SetExpectation(int id, const string& csv_values) override; void Invoke() override; bool CheckResults() override; + string ReadOutput(int id) override { return "no-op"; } private: class Expectation; -- GitLab From febed14f36eef0d3800d891b18717df36f786852 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Wed, 14 Feb 2018 18:38:44 -0800 Subject: [PATCH 202/629] Fix the build hdrs dependency for gpu_id. Reported by #16976. PiperOrigin-RevId: 185778815 --- tensorflow/core/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index d1fb9f4445..8eb5c11969 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2272,6 +2272,8 @@ GPU_RUNTIME_HEADERS = [ "common_runtime/gpu/gpu_cudamalloc_allocator.h", "common_runtime/gpu/gpu_debug_allocator.h", "common_runtime/gpu/gpu_device.h", + "common_runtime/gpu/gpu_id.h", + "common_runtime/gpu/gpu_id_manager.h", "common_runtime/gpu/gpu_id_utils.h", "common_runtime/gpu/gpu_init.h", "common_runtime/gpu/gpu_managed_allocator.h", -- GitLab From 109d7af263e927e6be4d596dfc37676d8dec5463 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 14 Feb 2018 18:40:31 -0800 Subject: [PATCH 203/629] Internal-only change. PiperOrigin-RevId: 185778955 --- tensorflow/compiler/xla/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 5ff7740752..8339d08ef4 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -586,6 +586,7 @@ xla_test( tags = [ "enormous", "manual", + "notap", ], deps = [ ":client_library_test_base", -- GitLab From 49f73c55d56edffebde4bca4a407ad69c1cae433 Mon Sep 17 00:00:00 2001 From: "David G. Andersen" Date: Wed, 14 Feb 2018 18:56:47 -0800 Subject: [PATCH 204/629] Fix integer overflow in BMP decoder by making the checks in DecodeBmp more stringent. Add fuzzer to improve the robustness of the decoder in the future. PiperOrigin-RevId: 185780111 --- tensorflow/core/kernels/decode_bmp_op.cc | 27 +++++++++++++---- tensorflow/core/kernels/fuzzing/BUILD | 2 ++ .../core/kernels/fuzzing/decode_bmp_fuzz.cc | 29 +++++++++++++++++++ 3 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 tensorflow/core/kernels/fuzzing/decode_bmp_fuzz.cc diff --git a/tensorflow/core/kernels/decode_bmp_op.cc b/tensorflow/core/kernels/decode_bmp_op.cc index b7d120a617..b4dcf0a74b 100644 --- a/tensorflow/core/kernels/decode_bmp_op.cc +++ b/tensorflow/core/kernels/decode_bmp_op.cc @@ -91,15 +91,32 @@ class DecodeBmpOp : public OpKernel { errors::InvalidArgument( "Number of channels must be 1, 3 or 4, was ", channels_)); + OP_REQUIRES(context, width > 0 && header_size >= 0, + errors::InvalidArgument("Width must be positive")); + OP_REQUIRES(context, header_size >= 0, + errors::InvalidArgument("header size must be nonnegative")); + + // The real requirement is < 2^31 minus some headers and channel data, + // so rounding down to something that's still ridiculously big. + OP_REQUIRES( + context, + (static_cast(width) * std::abs(static_cast(height))) < + static_cast(std::numeric_limits::max() / 8), + errors::InvalidArgument( + "Total possible pixel bytes must be less than 2^30")); + + const int32 abs_height = abs(height); + // there may be padding bytes when the width is not a multiple of 4 bytes // 8 * channels == bits per pixel const int row_size = (8 * channels_ * width + 31) / 32 * 4; - const int last_pixel_offset = - header_size + (abs(height) - 1) * row_size + (width - 1) * channels_; + const int64 last_pixel_offset = static_cast(header_size) + + (abs_height - 1) * row_size + + (width - 1) * channels_; // [expected file size] = [last pixel offset] + [last pixel size=channels] - const int expected_file_size = last_pixel_offset + channels_; + const int64 expected_file_size = last_pixel_offset + channels_; OP_REQUIRES( context, (expected_file_size <= input.size()), @@ -115,12 +132,12 @@ class DecodeBmpOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK( context, context->allocate_output( - 0, TensorShape({abs(height), width, channels_}), &output)); + 0, TensorShape({abs_height, width, channels_}), &output)); const uint8* bmp_pixels = &img_bytes[header_size]; Decode(bmp_pixels, row_size, output->flat().data(), width, - abs(height), channels_, top_down); + abs_height, channels_, top_down); } uint8* Decode(const uint8* input, const int row_size, uint8* const output, diff --git a/tensorflow/core/kernels/fuzzing/BUILD b/tensorflow/core/kernels/fuzzing/BUILD index 41af950d7d..9a7eca03ce 100644 --- a/tensorflow/core/kernels/fuzzing/BUILD +++ b/tensorflow/core/kernels/fuzzing/BUILD @@ -43,6 +43,8 @@ tf_ops_fuzz_target_lib("decode_base64") tf_ops_fuzz_target_lib("encode_jpeg") +tf_ops_fuzz_target_lib("decode_bmp") + tf_ops_fuzz_target_lib("decode_png") tf_ops_fuzz_target_lib("decode_jpeg") diff --git a/tensorflow/core/kernels/fuzzing/decode_bmp_fuzz.cc b/tensorflow/core/kernels/fuzzing/decode_bmp_fuzz.cc new file mode 100644 index 0000000000..01c56ac6f6 --- /dev/null +++ b/tensorflow/core/kernels/fuzzing/decode_bmp_fuzz.cc @@ -0,0 +1,29 @@ +/* Copyright 2018 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/kernels/fuzzing/fuzz_session.h" + +namespace tensorflow { +namespace fuzzing { + +class FuzzDecodeBmp : public FuzzStringInputOp { + SINGLE_INPUT_OP_BUILDER(DT_STRING, DecodeBmp); +}; + +STANDARD_TF_FUZZ_FUNCTION(FuzzDecodeBmp); + +} // end namespace fuzzing +} // end namespace tensorflow -- GitLab From e5e03ef3148303b3dfed89a1492dedf92b45be25 Mon Sep 17 00:00:00 2001 From: Jiongyan Zhang Date: Thu, 15 Feb 2018 11:00:38 +0800 Subject: [PATCH 205/629] Make get_placeholders() accessible and add example (#15440) * make get_placeholders() accessible and add example * add test * Revert "add test" This reverts commit 9e1f4c73afdcc8c4b3952a628ab4935c77d18659. * Update comment. --- tensorflow/contrib/framework/__init__.py | 2 ++ .../contrib/framework/python/framework/graph_util.py | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index fb101c3653..a49d42cd52 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -85,6 +85,8 @@ See the @{$python/contrib.framework} guide. @@py_func @@sort +@@get_placeholders + @@CriticalSection @@BoundedTensorSpec diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py index a18ff2320d..49eec3a3f1 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util.py +++ b/tensorflow/contrib/framework/python/framework/graph_util.py @@ -133,6 +133,18 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes, def get_placeholders(graph): """Get placeholders of a graph. + For example: + + ```python + a = tf.placeholder(dtype=tf.float32, shape=[2, 2], name='a') + a = tf.placeholder(dtype=tf.int32, shape=[3, 2], name='b') + + tf.contrib.framework.get_placeholders(tf.get_default_graph()) + # Returns: + # [, + # ] + ``` + Args: graph: A tf.Graph. Returns: -- GitLab From c71f331ae3f96a7a849da6b5901d42a695173b7f Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 14 Feb 2018 19:04:12 -0800 Subject: [PATCH 206/629] Error out if user provided num_shards is incorrect for non-model-parallelism case. PiperOrigin-RevId: 185780685 --- tensorflow/contrib/tpu/python/tpu/tpu_context.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_context.py b/tensorflow/contrib/tpu/python/tpu/tpu_context.py index 344ff9a37f..c5c46ea741 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_context.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_context.py @@ -425,14 +425,7 @@ class _TPUContext(object): self._get_master_address(), num_replicas, user_provided_num_replicas)) - if self.model_parallelism_enabled: - raise ValueError(message) - else: - logging.warning(message) - logging.warning( - 'For non-model-parallelism, TPUEstimator currently ' - 'automatically queries the TPU system information so ignores ' - 'this field.') + raise ValueError(message) if mode == model_fn_lib.ModeKeys.TRAIN: if self._train_batch_size % num_replicas != 0: -- GitLab From 5436148058543e50383a19db3ad1cd8b20889dcc Mon Sep 17 00:00:00 2001 From: Yao Zhang Date: Wed, 14 Feb 2018 19:35:36 -0800 Subject: [PATCH 207/629] Fix a bug to update reduction axes for all supported cases. Turn off layout optimizer for all reference runs, as it is on by default now. PiperOrigin-RevId: 185782777 --- .../grappler/optimizers/layout_optimizer.cc | 2 +- .../python/grappler/layout_optimizer_test.py | 128 +++++++++++++----- 2 files changed, 94 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index a606f972ac..826f00209b 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1794,7 +1794,7 @@ class ReduceProcessor : public AgnosticNodeProcessor { } Status CustomizedProcessing() override { - if (IsAlongNHW() || IsAlongHW() || IsAlongC()) { + if (IsReduceAxisSupported()) { DataType dtype = node_->attr().at("Tidx").type(); TF_RETURN_IF_ERROR( UpdateOrTransformParamInput(1, "DataFormatDimMap", dtype)); diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index b04bbb0daa..0f51501740 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -256,7 +256,7 @@ class LayoutOptimizerTest(test.TestCase): x = random_ops.truncated_normal([1, 784], seed=0) output = _two_layer_model(x) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -293,7 +293,7 @@ class LayoutOptimizerTest(test.TestCase): add = bn0[0] + bn1[0] output = array_ops.identity(add) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: @@ -325,7 +325,7 @@ class LayoutOptimizerTest(test.TestCase): value=conv, size_splits=sizes, axis=dim, num_split=3) output = math_ops.reduce_sum(split[0]) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={dim: 3}) with session.Session(config=_get_config()) as sess: @@ -359,7 +359,7 @@ class LayoutOptimizerTest(test.TestCase): pad = array_ops.pad(conv, paddings) output = array_ops.identity(pad) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -390,7 +390,7 @@ class LayoutOptimizerTest(test.TestCase): reduce_sum = math_ops.reduce_sum(conv) output = array_ops.identity(reduce_sum) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -419,7 +419,7 @@ class LayoutOptimizerTest(test.TestCase): cast = math_ops.cast(conv, dtype='bool') output = array_ops.identity(cast) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -450,7 +450,7 @@ class LayoutOptimizerTest(test.TestCase): squeeze = array_ops.squeeze(reduce_sum) output = array_ops.identity(squeeze) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -480,7 +480,7 @@ class LayoutOptimizerTest(test.TestCase): squeeze = array_ops.squeeze(reduce_sum, axis=[1, 2]) output = array_ops.identity(squeeze) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -510,7 +510,7 @@ class LayoutOptimizerTest(test.TestCase): squeeze = array_ops.squeeze(reduce_sum, axis=[0, 1, 2]) output = array_ops.identity(squeeze) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -539,7 +539,7 @@ class LayoutOptimizerTest(test.TestCase): reduce_sum = math_ops.reduce_sum(conv, axis=[1, 2, 3]) output = array_ops.identity(reduce_sum) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -568,7 +568,7 @@ class LayoutOptimizerTest(test.TestCase): reduce_sum = math_ops.reduce_sum(conv, axis=[0, 1, 2]) output = array_ops.identity(reduce_sum) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -597,7 +597,7 @@ class LayoutOptimizerTest(test.TestCase): reduce_sum = math_ops.reduce_sum(conv, axis=[3]) output = array_ops.identity(reduce_sum) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -626,7 +626,7 @@ class LayoutOptimizerTest(test.TestCase): reduce_sum = math_ops.reduce_sum(conv, axis=[3], keep_dims=True) output = array_ops.identity(reduce_sum) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -648,6 +648,64 @@ class LayoutOptimizerTest(test.TestCase): self._assert_trans_nchw_to_nhwc('Sum-0-0', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testReduceSumAlongHKeepDims(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + reduce_sum = math_ops.reduce_sum(conv, axis=[2], keep_dims=True) + output = array_ops.identity(reduce_sum) + + with session.Session(config=_get_config(False)) as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run(output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + # Four transposes were initially added in the Expand phase of + # LayoutOptimizer; two of them are cancelled out in the Collapse phase. + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + + def testReduceSumAlongWCKeepDims(self): + if test.is_gpu_available(cuda_only=True): + random_seed.set_random_seed(0) + x = random_ops.truncated_normal([1, 784], seed=0) + conv = _two_layer_model(x) + reduce_sum = math_ops.reduce_sum(conv, axis=[2, 3], keep_dims=True) + output = array_ops.identity(reduce_sum) + + with session.Session(config=_get_config(False)) as sess: + output_val_ref = sess.run(output) + + with session.Session(config=_get_config()) as sess: + metadata = config_pb2.RunMetadata() + output_val = sess.run(output, run_metadata=metadata) + + nodes = [] + num_transposes = 0 + for node in metadata.cost_graph.node: + if _is_transpose(node.name): + num_transposes += 1 + nodes.append(node.name) + + # Four transposes were initially added in the Expand phase of + # LayoutOptimizer; two of them are cancelled out in the Collapse phase. + expected_num_transposes = 2 + self.assertEqual(expected_num_transposes, num_transposes) + self._assert_trans_nhwc_to_nchw('Conv2D-0', nodes) + self.assertAllClose(output_val_ref, output_val, atol=1e-3) + def testConcatWithControlDependency(self): if test.is_gpu_available(cuda_only=True): random_seed.set_random_seed(0) @@ -660,7 +718,7 @@ class LayoutOptimizerTest(test.TestCase): concat = array_ops.concat([conv, conv], axis) output = array_ops.identity(concat) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -694,7 +752,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(fill) x_val = [3.4] * 784 - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={x: x_val}) with session.Session(config=_get_config()) as sess: @@ -736,7 +794,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(tile) multiple_val = [2, 3, 4, 1] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={multiple: multiple_val}) with session.Session(config=_get_config()) as sess: @@ -771,7 +829,7 @@ class LayoutOptimizerTest(test.TestCase): reverse = array_ops.reverse(conv, dims) output = array_ops.identity(reverse) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -804,7 +862,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(reverse) dims_val = [2, 3] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={dims: dims_val}) with session.Session(config=_get_config()) as sess: @@ -841,7 +899,7 @@ class LayoutOptimizerTest(test.TestCase): select = gen_math_ops._select(condition, conv, add) output = array_ops.identity(select) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -872,7 +930,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(select) condition_val = np.zeros((1, 7, 7, 64)) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={condition: condition_val}) with session.Session(config=_get_config()) as sess: @@ -902,7 +960,7 @@ class LayoutOptimizerTest(test.TestCase): select = gen_math_ops._select(condition, conv, add) output = array_ops.identity(select) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -932,7 +990,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(pad) paddings_val = [[1, 2], [3, 4], [5, 6], [7, 8]] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={paddings: paddings_val}) with session.Session(config=_get_config()) as sess: @@ -969,7 +1027,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(max_pool) strides_val = [1, 3, 2, 1] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={strides: strides_val}) with session.Session(config=_get_config()) as sess: @@ -1006,7 +1064,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(max_pool_grad) strides_val = [1, 3, 2, 1] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={strides: strides_val}) with session.Session(config=_get_config()) as sess: @@ -1041,7 +1099,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(s) size_val = [1, 2, 3, 4] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={size: size_val}) with session.Session(config=_get_config()) as sess: @@ -1077,7 +1135,7 @@ class LayoutOptimizerTest(test.TestCase): output = array_ops.identity(s) end_val = [1, 2, 3, 4] - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={end: end_val}) with session.Session(config=_get_config()) as sess: @@ -1115,7 +1173,7 @@ class LayoutOptimizerTest(test.TestCase): s = conv[:, :, 1:-1, :] output = array_ops.identity(s) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -1150,7 +1208,7 @@ class LayoutOptimizerTest(test.TestCase): s = conv[:, :, :, 1:-1] output = array_ops.identity(s) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -1189,7 +1247,7 @@ class LayoutOptimizerTest(test.TestCase): [1, 2, 3, 1], s) output = array_ops.identity(s_grad) - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={end: end_val}) with session.Session(config=_get_config()) as sess: @@ -1225,7 +1283,7 @@ class LayoutOptimizerTest(test.TestCase): output = math_ops.add(shapen[0], shapen[1]) x_val = [1.7] * 784 - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={x: x_val}) with session.Session(config=_get_config()) as sess: @@ -1259,7 +1317,7 @@ class LayoutOptimizerTest(test.TestCase): output = math_ops.add_n([conv_reshape, ones]) x_val = [1.7] * 784 - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output, feed_dict={x: x_val}) with session.Session(config=_get_config()) as sess: @@ -1283,7 +1341,7 @@ class LayoutOptimizerTest(test.TestCase): if test.is_gpu_available(cuda_only=True): output = _loop() - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -1310,7 +1368,7 @@ class LayoutOptimizerTest(test.TestCase): if test.is_gpu_available(cuda_only=True): output = _loop_with_branch() - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -1334,7 +1392,7 @@ class LayoutOptimizerTest(test.TestCase): if test.is_gpu_available(cuda_only=True): output = _loop_with_vec_and_4d() - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: @@ -1358,7 +1416,7 @@ class LayoutOptimizerTest(test.TestCase): if test.is_gpu_available(cuda_only=True): output = _model_with_second_port() - with session.Session() as sess: + with session.Session(config=_get_config(False)) as sess: output_val_ref = sess.run(output) with session.Session(config=_get_config()) as sess: -- GitLab From 7967ccf3d465815ea0036069a96a649b4ca9d592 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 14 Feb 2018 21:41:56 -0800 Subject: [PATCH 208/629] tf.image.resize_bilinear gradient support for float16 The required kernel changes were implemented almost 2 years ago in 80da0a63200cb7c9c449188620992c7a8d18c8b9, but we forgot to change the Python gradient registry. PiperOrigin-RevId: 185790703 --- tensorflow/python/ops/image_grad.py | 13 +++------- tensorflow/python/ops/image_grad_test.py | 32 +++++++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py index d17f1a87d9..093843cd5b 100644 --- a/tensorflow/python/ops/image_grad.py +++ b/tensorflow/python/ops/image_grad.py @@ -61,15 +61,10 @@ def _ResizeBilinearGrad(op, grad): Returns: The gradients w.r.t. the input. """ - allowed_types = [dtypes.float32, dtypes.float64] - grad0 = None - if op.inputs[0].dtype in allowed_types: - # pylint: disable=protected-access - grad0 = gen_image_ops._resize_bilinear_grad( - grad, - op.inputs[0], - align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access + # pylint: disable=protected-access + grad0 = gen_image_ops._resize_bilinear_grad( + grad, op.inputs[0], align_corners=op.get_attr("align_corners")) + # pylint: enable=protected-access return [grad0, None] diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py index 05e8fa1d72..75d00c8ed1 100644 --- a/tensorflow/python/ops/image_grad_test.py +++ b/tensorflow/python/ops/image_grad_test.py @@ -142,18 +142,6 @@ class ResizeBilinearOpTest(test.TestCase): input_tensor, in_shape, resize_out, out_shape, x_init_value=x) self.assertLess(err, 1e-3) - def testGradOnUnsupportedType(self): - in_shape = [1, 4, 6, 1] - out_shape = [1, 2, 3, 1] - - x = np.arange(0, 24).reshape(in_shape).astype(np.uint8) - - with self.test_session(): - input_tensor = constant_op.constant(x, shape=in_shape) - resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3]) - grad = gradients_impl.gradients(input_tensor, [resize_out]) - self.assertEqual([None], grad) - def testCompareGpuVsCpu(self): in_shape = [2, 4, 6, 3] out_shape = [2, 8, 16, 3] @@ -172,6 +160,26 @@ class ResizeBilinearOpTest(test.TestCase): self.assertAllClose(grad[False], grad[True], rtol=1e-4, atol=1e-4) + def testTypes(self): + in_shape = [1, 4, 6, 1] + out_shape = [1, 2, 3, 1] + x = np.arange(0, 24).reshape(in_shape) + + with self.test_session() as sess: + for dtype in [np.float16, np.float32, np.float64]: + input_tensor = constant_op.constant(x.astype(dtype), shape=in_shape) + resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3]) + grad = sess.run(gradients_impl.gradients(resize_out, input_tensor))[0] + self.assertAllEqual(in_shape, grad.shape) + # Not using gradient_checker.compute_gradient as I didn't work out + # the changes required to compensate for the lower precision of + # float16 when computing the numeric jacobian. + # Instead, we just test the theoretical jacobian. + self.assertAllEqual([[[[1.], [0.], [1.], [0.], [1.], [0.]], [[0.], [ + 0. + ], [0.], [0.], [0.], [0.]], [[1.], [0.], [1.], [0.], [1.], [0.]], + [[0.], [0.], [0.], [0.], [0.], [0.]]]], grad) + class ResizeBicubicOpTest(test.TestCase): -- GitLab From bcb940dfc2987c3020045e559a004b3179ec1c41 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 15 Feb 2018 00:26:10 -0800 Subject: [PATCH 209/629] Java: Release 1.6.0-rc1 PiperOrigin-RevId: 185801747 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 99add51069..d35bb41112 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index 7bb9879f68..d9ba1bbbfb 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index 268e1bae1f..f6f532c2c1 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 6a3abcbc11..0a6b3d23d7 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 54a4fd577a..1d8e872373 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 76e0fecae4..5c1b55085c 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc0 + 1.6.0-rc1 ../ tensorflow -- GitLab From 3177a76bcb9a2c1166aa9d7e9fbb76d0fef1b6e3 Mon Sep 17 00:00:00 2001 From: Seungil You Date: Thu, 15 Feb 2018 18:34:10 +0900 Subject: [PATCH 210/629] Add clean_dep to tf_cc_test. --- tensorflow/tensorflow.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2ead85d26d..818d67f7b5 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -618,7 +618,7 @@ def tf_cc_test(name, srcs=srcs + tf_binary_additional_srcs(), copts=tf_copts() + extra_copts, linkopts=select({ - "//tensorflow:android": [ + clean_dep("//tensorflow:android"): [ "-pie", ], clean_dep("//tensorflow:windows"): [], -- GitLab From 69d8bec1388c306b90dd9f66098a882e668435f4 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 15 Feb 2018 04:12:20 -0800 Subject: [PATCH 211/629] Fix "cudnn64_7.dll" in install_windows.md PiperOrigin-RevId: 185818395 --- tensorflow/docs_src/install/install_windows.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 6cc5b92305..e020451c04 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -47,7 +47,7 @@ installed on your system: If you have a different version of one of the preceding packages, please change to the specified versions. In particular, the cuDNN version -must match exactly: TensorFlow will not load if it cannot find `cuDNN64_6.dll`. +must match exactly: TensorFlow will not load if it cannot find `cudnn64_7.dll`. To use a different version of cuDNN, you must build from source. ## Determine how to install TensorFlow -- GitLab From bcc50a14989335de4ebfe3327a5fc8baa17f5465 Mon Sep 17 00:00:00 2001 From: Donny Viszneki Date: Thu, 15 Feb 2018 05:10:26 -0800 Subject: [PATCH 212/629] conv1d doc string misnames first argument The docs say `conv2d()`'s first argument is `input` which is maybe how the docstring for `conv1d()` ended up saying `input` instead of `value` when describing the `filters` argument. --- tensorflow/python/ops/nn_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 47f48a7e16..2a00005bd7 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2380,7 +2380,7 @@ def conv1d(value, Args: value: A 3D `Tensor`. Must be of type `float16` or `float32`. - filters: A 3D `Tensor`. Must have the same type as `input`. + filters: A 3D `Tensor`. Must have the same type as `value`. stride: An `integer`. The number of entries by which the filter is moved right at each step. padding: 'SAME' or 'VALID' -- GitLab From df803bd35bef6bfa1c145d135e06b4054311ef0b Mon Sep 17 00:00:00 2001 From: Naman Kamra Date: Thu, 15 Feb 2018 17:51:49 +0400 Subject: [PATCH 213/629] fixed typo in docstring for unchanged shape method --- tensorflow/python/framework/common_shapes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/common_shapes.py b/tensorflow/python/framework/common_shapes.py index 3b1092f923..3c5aebbce8 100644 --- a/tensorflow/python/framework/common_shapes.py +++ b/tensorflow/python/framework/common_shapes.py @@ -34,7 +34,7 @@ def scalar_shape(unused_op): def unchanged_shape(op): - """Shape function for ops that output an tensor like their first input.""" + """Shape function for ops that output a tensor like their first input.""" return [op.inputs[0].get_shape()] -- GitLab From cf74c749aa9f7fb8eabb4a254c4f53cc2dbadae3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 06:52:26 -0800 Subject: [PATCH 214/629] Optimize away multiply by constant zero PiperOrigin-RevId: 185831862 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../resolve_multiply_by_zero.cc | 152 ++++++++++++++++++ tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + 4 files changed, 155 insertions(+) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 45031de09c..e2879fad32 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -224,6 +224,7 @@ cc_library( "graph_transformations/resolve_constant_transpose.cc", "graph_transformations/resolve_constant_unary.cc", "graph_transformations/resolve_mean_attributes.cc", + "graph_transformations/resolve_multiply_by_zero.cc", "graph_transformations/resolve_pad_attributes.cc", "graph_transformations/resolve_reorder_axes.cc", "graph_transformations/resolve_reshape_attributes.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 3ab01ae643..616bdac268 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -174,6 +174,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantShapeOrRank) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStack) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) +DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) DECLARE_GRAPH_TRANSFORMATION(Dequantize) class ResolveReshapeAttributes : public GraphTransformation { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc new file mode 100644 index 0000000000..37beb41dfc --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc @@ -0,0 +1,152 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +template +bool AreAllBufferElementsZero(const std::vector& buffer_data) { + for (auto x : buffer_data) { + if (x != 0) { + return false; + } + } + return true; +} + +template +void FillArrayWithZeros(Array* array) { + CHECK(array->data_type == Type); + std::vector>& data = array->GetMutableBuffer().data; + data.resize(RequiredBufferSizeForShape(array->shape())); + for (size_t i = 0; i < data.size(); i++) { + data[i] = 0; + } +} + +} // namespace + +// Removes a multiplication by array of constant zeros by making the output +// array an array of constant zeros and removing the input arrays if they are no +// longer needed. +bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { + const auto mul_it = model->operators.begin() + op_index; + auto* mul_op = mul_it->get(); + if (mul_op->type != OperatorType::kMul) { + return false; + } + const auto& output_array_name = mul_op->outputs[0]; + auto& output_array = model->GetArray(output_array_name); + + // Yield if the output shape is not known yet. + if (!output_array.has_shape()) { + return false; + } + + // This transformation only handles the case where one operand is all 0's and + // the other is non-constant. Other cases are handled by constant propagation + // or the trivial binary removal pass. + const bool is_input_constant[2] = { + IsConstantParameterArray(*model, mul_op->inputs[0]), + IsConstantParameterArray(*model, mul_op->inputs[1]), + }; + if (!is_input_constant[0] && !is_input_constant[1]) { + // Neither input is constant, so nothing we can resolve here. + return false; + } + if (is_input_constant[0] && is_input_constant[1]) { + // Both inputs are constants. That's a job for constants propagation, not + // for us to handle here. + return false; + } + const int index_of_constant_input = is_input_constant[0] ? 0 : 1; + const int index_of_variable_input = is_input_constant[0] ? 1 : 0; + CHECK(is_input_constant[index_of_constant_input]); + CHECK(!is_input_constant[index_of_variable_input]); + + const auto& constant_input_array = + model->GetArray(mul_op->inputs[index_of_constant_input]); + + CHECK(constant_input_array.data_type == output_array.data_type); + switch (output_array.data_type) { + case ArrayDataType::kFloat: { + const auto& constant_input_data = + constant_input_array.GetBuffer().data; + if (!AreAllBufferElementsZero>( + constant_input_data)) { + return false; + } + FillArrayWithZeros(&output_array); + } break; + case ArrayDataType::kUint8: { + const auto& constant_input_data = + constant_input_array.GetBuffer().data; + if (!AreAllBufferElementsZero>( + constant_input_data)) { + return false; + } + FillArrayWithZeros(&output_array); + } break; + case ArrayDataType::kInt32: { + const auto& constant_input_data = + constant_input_array.GetBuffer().data; + if (!AreAllBufferElementsZero>( + constant_input_data)) { + return false; + } + FillArrayWithZeros(&output_array); + } break; + case ArrayDataType::kInt64: { + const auto& constant_input_data = + constant_input_array.GetBuffer().data; + if (!AreAllBufferElementsZero>( + constant_input_data)) { + return false; + } + FillArrayWithZeros(&output_array); + } break; + default: + AddMessageF( + "Cannot resolve multiply by 0 because of unsupported data type\n"); + return false; + } + + // Erase input arrays to the multiply if no longer used + if (IsDiscardableArray(*model, mul_op->inputs[0]) && + CountOpsWithInput(*model, mul_op->inputs[0]) == 1) { + model->EraseArray(mul_op->inputs[0]); + } + if (IsDiscardableArray(*model, mul_op->inputs[1]) && + CountOpsWithInput(*model, mul_op->inputs[1]) == 1) { + model->EraseArray(mul_op->inputs[1]); + } + + // Erase the multiply operator. + model->operators.erase(mul_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 5472c52c96..864c646a8c 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -86,6 +86,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveTensorFlowSwitch); transformations->Add(new ResolveTensorFlowTile); transformations->Add(new ResolveTensorFlowConcat); + transformations->Add(new ResolveMultiplyByZero); transformations->Add(new IdentifyL2Normalization); transformations->Add(new IdentifyL2Pool); transformations->Add(new IdentifyRelu1); -- GitLab From 6e3f0c30db478ae4dc96690c64d1f52e15de5d23 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Fri, 16 Feb 2018 00:12:26 +0800 Subject: [PATCH 215/629] fix msvc error C3016 --- tensorflow/core/kernels/slice_op.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 79369fd4a9..77594479cb 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -358,11 +358,11 @@ class MklSliceOp : public OpKernel { /* data format = NCHW */ #pragma omp parallel for - for (size_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { + for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { T* ip = in_buf + (d0 * in_strides[0]); T* op = op_buf + ((d0 - begin[0]) * out_strides[0]); #pragma omp parallel for - for (size_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { + for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { T* ip1 = ip + (d1 * in_strides[1]); T* op1 = op + ((d1 - begin[1]) * out_strides[1]); // For NCHW, H and W will be contiguous. So we can copy @@ -376,15 +376,15 @@ class MklSliceOp : public OpKernel { /* data_format = NHWC */ #pragma omp parallel for - for (size_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { + for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { T* ip = in_buf + (d0 * in_strides[0]); T* op = op_buf + ((d0 - begin[0]) * out_strides[0]); #pragma omp parallel for - for (size_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { + for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { T* ip1 = ip + (d1 * in_strides[1]); T* op1 = op + ((d1 - begin[1]) * out_strides[1]); #pragma omp parallel for - for (size_t d2 = begin[2]; d2 < begin[2] + size[2]; d2++) { + for (ssize_t d2 = begin[2]; d2 < begin[2] + size[2]; d2++) { T* ip2 = ip1 + (d2 * in_strides[2]); T* ip3 = ip2 + begin[3]; T* op2 = op1 + ((d2 - begin[2]) * out_strides[2]); -- GitLab From 25d2682d88a3ee535be133133a1132ec79d65c5f Mon Sep 17 00:00:00 2001 From: fo40225 Date: Fri, 16 Feb 2018 00:15:01 +0800 Subject: [PATCH 216/629] remove unused openmp code --- tensorflow/core/kernels/xsmm_conv2d.cc | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tensorflow/core/kernels/xsmm_conv2d.cc b/tensorflow/core/kernels/xsmm_conv2d.cc index 601704c8a7..ba03357cc6 100644 --- a/tensorflow/core/kernels/xsmm_conv2d.cc +++ b/tensorflow/core/kernels/xsmm_conv2d.cc @@ -27,9 +27,6 @@ void dummy_xsmm_conv2d_ensure_file_is_not_empty(); #include #include -#if 0 -#include -#endif #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/lib/core/blocking_counter.h" @@ -360,7 +357,6 @@ static bool CallLibxsmmConvGeneric(OpKernelContext* ctx, l_tick6 = libxsmm_timer_tick(); #endif -#if 1 BlockingCounter counter(num_threads); for (int i = 0; i < num_threads; ++i) { @@ -371,14 +367,6 @@ static bool CallLibxsmmConvGeneric(OpKernelContext* ctx, }); } counter.Wait(); -#else -#pragma omp parallel - { - chk_libxsmm_err( - libxsmm_dnn_execute_st(libxsmm_handle, kind, 0, omp_get_thread_num()), - "Worker"); - } -#endif #if defined(LIBXSMM_DETAILED_TIMING) l_tick7 = libxsmm_timer_tick(); -- GitLab From 023d47d0f1499f291ff6a6a00de301d0cba6a971 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Fri, 16 Feb 2018 00:18:27 +0800 Subject: [PATCH 217/629] remove unistd.h, use tensorflow::uint32 instead of uint --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 1 - tensorflow/core/graph/mkl_tfconversion_pass.cc | 2 +- tensorflow/core/kernels/mkl_input_conversion_op.cc | 4 ++-- tensorflow/core/kernels/mkl_tfconv_op.h | 2 +- tensorflow/core/util/mkl_util.h | 8 ++++---- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 0eb47f4e56..b6f74c2bae 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -21,7 +21,6 @@ limitations under the License. #ifdef INTEL_MKL -#include #include #include #include "tensorflow/core/common_runtime/bfc_allocator.h" diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index 5343e6802d..e9ced4d2b6 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -222,7 +222,7 @@ Status MklToTfConversionPass::InsertInputConversionNode( BaseType(n->input_type(0))); // Check ordering of edges - for (uint i = 0; i < 4; i++) { + for (uint32 i = 0; i < 4; i++) { CHECK_EQ((edges[i]->dst_input() == i), true); } diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index 5a8799ae93..e9a2376b54 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -145,8 +145,8 @@ class MklInputConversionOp : public OpKernel { const MklShape* mkl_shape; const Tensor* tf_tensor; MklShape* tf_mkl_shape; - uint mkl_tensor_index; - uint tf_tensor_index; + uint32 mkl_tensor_index; + uint32 tf_tensor_index; if (input_shape_0.IsMklTensor() && !input_shape_1.IsMklTensor()) { mkl_tensor = &input_tensor_0; mkl_shape = &input_shape_0; diff --git a/tensorflow/core/kernels/mkl_tfconv_op.h b/tensorflow/core/kernels/mkl_tfconv_op.h index 5fafa14b5d..ddea9e281b 100644 --- a/tensorflow/core/kernels/mkl_tfconv_op.h +++ b/tensorflow/core/kernels/mkl_tfconv_op.h @@ -128,7 +128,7 @@ class MklToTfOp : public OpKernel { #else static void ConvertMklToTf(OpKernel* op_kernel, OpKernelContext* context, string data_format_str, DataType op_data_type, - bool has_avx512f, uint input_number) { + bool has_avx512f, uint32 input_number) { // Check that input tensor is in MKL format. const Tensor& input_tensor = MklGetInput(context, input_number); MklShape input_shape; diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index db4c5c35e3..eda966bc33 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1112,9 +1112,9 @@ inline void ForwardMklTensorInToOutWithMklShape(OpKernelContext* context, // Forward the MKL shape ONLY (used in elementwise and other ops where // we call the eigen implementation and MKL shape is not used) inline void ForwardMklMetaDataInToOut(OpKernelContext* context, - uint idx_data_in, uint idx_data_out) { - uint idx_meta_in = GetTensorMetaDataIndex(idx_data_in, context->num_inputs()); - uint idx_meta_out = + uint32 idx_data_in, uint32_t idx_data_out) { + uint32 idx_meta_in = GetTensorMetaDataIndex(idx_data_in, context->num_inputs()); + uint32 idx_meta_out = GetTensorMetaDataIndex(idx_data_out, context->num_outputs()); if (IsRefType(context->input_dtype(idx_data_in))) { @@ -1126,7 +1126,7 @@ inline void ForwardMklMetaDataInToOut(OpKernelContext* context, // Set a dummy MKL shape (called when the output is in TF format) inline void SetDummyMklShapeOutput(OpKernelContext* context, - uint idx_data_out) { + uint32 idx_data_out) { MklShape mkl_shape_output; mkl_shape_output.SetMklTensor(false); AllocateOutputSetMklShape(context, idx_data_out, mkl_shape_output); -- GitLab From 24e343b18c56cf6cc46ed7d58bc6cfb1e5c06688 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Fri, 16 Feb 2018 00:19:29 +0800 Subject: [PATCH 218/629] fix MKL_Complex cast problem error : argument of type "" is incompatible with parameter of type "" --- .../core/kernels/mkl_batch_matmul_op.cc | 24 +++++++--------- tensorflow/core/kernels/mkl_matmul_op.cc | 28 +++++++++---------- tensorflow/core/kernels/mkl_transpose_op.cc | 28 +++++++++++++++---- 3 files changed, 47 insertions(+), 33 deletions(-) diff --git a/tensorflow/core/kernels/mkl_batch_matmul_op.cc b/tensorflow/core/kernels/mkl_batch_matmul_op.cc index d9713075be..c48a2038f9 100644 --- a/tensorflow/core/kernels/mkl_batch_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_batch_matmul_op.cc @@ -29,7 +29,6 @@ limitations under the License. #include #include "mkl_cblas.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -41,9 +40,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" -#define MKL_Complex8 tensorflow::complex64 -#define MKL_Complex16 tensorflow::complex128 - namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -180,16 +176,16 @@ class BatchMatMulMkl : public OpKernel { void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, const MKL_INT *M_Array, const MKL_INT *N_Array, const MKL_INT *K_Array, - const MKL_Complex8 **A_Array, const MKL_INT *lda_Array, - const MKL_Complex8 **B_Array, const MKL_INT *ldb_Array, - MKL_Complex8 **C_Array, const MKL_INT *ldc_Array, + const complex64 **A_Array, const MKL_INT *lda_Array, + const complex64 **B_Array, const MKL_INT *ldb_Array, + complex64 **C_Array, const MKL_INT *ldc_Array, const MKL_INT group_count, const MKL_INT *group_size) { std::vector TransA_array( group_size[0], TransA ? CblasConjTrans : CblasNoTrans); std::vector TransB_array( group_size[0], TransB ? CblasConjTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); - std::vector beta_Array(group_size[0], {0.0f, 0.0f}); + std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); + std::vector beta_Array(group_size[0], {0.0f, 0.0f}); cblas_cgemm_batch( Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array, static_cast(&alpha_Array[0]), @@ -202,18 +198,18 @@ class BatchMatMulMkl : public OpKernel { void MklCblasGemmBatch(const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB, const MKL_INT *M_Array, const MKL_INT *N_Array, const MKL_INT *K_Array, - const MKL_Complex16 **A_Array, + const complex128 **A_Array, const MKL_INT *lda_Array, - const MKL_Complex16 **B_Array, - const MKL_INT *ldb_Array, MKL_Complex16 **C_Array, + const complex128 **B_Array, + const MKL_INT *ldb_Array, complex128 **C_Array, const MKL_INT *ldc_Array, const MKL_INT group_count, const MKL_INT *group_size) { std::vector TransA_array( group_size[0], TransA ? CblasConjTrans : CblasNoTrans); std::vector TransB_array( group_size[0], TransB ? CblasConjTrans : CblasNoTrans); - std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); - std::vector beta_Array(group_size[0], {0.0f, 0.0f}); + std::vector alpha_Array(group_size[0], {1.0f, 0.0f}); + std::vector beta_Array(group_size[0], {0.0f, 0.0f}); cblas_zgemm_batch( Layout, &TransA_array[0], &TransB_array[0], M_Array, N_Array, K_Array, static_cast(&alpha_Array[0]), diff --git a/tensorflow/core/kernels/mkl_matmul_op.cc b/tensorflow/core/kernels/mkl_matmul_op.cc index 47598f443f..25ad8c94a7 100644 --- a/tensorflow/core/kernels/mkl_matmul_op.cc +++ b/tensorflow/core/kernels/mkl_matmul_op.cc @@ -170,32 +170,32 @@ class MklMatMulOp : public OpKernel { // Matrix-Matrix Multiplication with Complex64 (std::complex) tensors. // For detailed info about parameters, look at FP32 function description. void MklBlasGemm(bool transa, bool transb, const int m, const int n, - const int k, const std::complex* a, const int lda, - const std::complex* b, const int ldb, - std::complex* c, int const ldc) { + const int k, const complex64* a, const int lda, + const complex64* b, const int ldb, + complex64* c, int const ldc) { const MKL_Complex8 alpha = {1.0f, 0.0f}; const MKL_Complex8 beta = {0.0f, 0.0f}; cblas_cgemm(CblasRowMajor, transa ? CblasTrans : CblasNoTrans, - transb ? CblasTrans : CblasNoTrans, m, n, k, - static_cast(&alpha), static_cast(a), - lda, static_cast(b), ldb, - static_cast(&beta), static_cast(c), ldc); + transb ? CblasTrans : CblasNoTrans, + m, n, k, &alpha, reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, &beta, + reinterpret_cast(c), ldc); } // Matrix-Matrix Multiplication with Complex128 (std::complex) // tensors. For detailed info about parameters, look at FP32 function // description. void MklBlasGemm(bool transa, bool transb, const int m, const int n, - const int k, const std::complex* a, const int lda, - const std::complex* b, const int ldb, - std::complex* c, const int ldc) { + const int k, const complex128* a, const int lda, + const complex128* b, const int ldb, + complex128* c, const int ldc) { const MKL_Complex16 alpha = {1.0, 0.0}; const MKL_Complex16 beta = {0.0, 0.0}; cblas_zgemm(CblasRowMajor, transa ? CblasTrans : CblasNoTrans, - transb ? CblasTrans : CblasNoTrans, m, n, k, - static_cast(&alpha), static_cast(a), - lda, static_cast(b), ldb, - static_cast(&beta), static_cast(c), ldc); + transb ? CblasTrans : CblasNoTrans, + m, n, k, &alpha, reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, &beta, + reinterpret_cast(c), ldc); } }; diff --git a/tensorflow/core/kernels/mkl_transpose_op.cc b/tensorflow/core/kernels/mkl_transpose_op.cc index 764d4c9400..b44b4d6f54 100644 --- a/tensorflow/core/kernels/mkl_transpose_op.cc +++ b/tensorflow/core/kernels/mkl_transpose_op.cc @@ -18,9 +18,6 @@ limitations under the License. #ifdef INTEL_MKL #define EIGEN_USE_THREADS -#include "tensorflow/core/framework/numeric_types.h" -#define MKL_Complex8 tensorflow::complex64 -#define MKL_Complex16 tensorflow::complex128 #include "mkl_trans.h" #include "tensorflow/core/kernels/transpose_functor.h" #include "tensorflow/core/kernels/transpose_op.h" @@ -62,10 +59,31 @@ Status MKLTranspose2D(const char trans, const Tensor& in, Tensor* out); INSTANTIATE(float, s) INSTANTIATE(double, d) -INSTANTIATE(complex64, c) -INSTANTIATE(complex128, z) + #undef INSTANTIATE +template <> +Status MKLTranspose2D(const char trans, const Tensor& in, Tensor* out) { + const MKL_Complex8 alpha = { 1.0f, 0.0f }; + mkl_comatcopy('R', trans, in.dim_size(0), in.dim_size(1), alpha, + reinterpret_cast(in.flat().data()), + in.dim_size(1), + reinterpret_cast(const_cast(out->flat().data())), + in.dim_size(0)); + return Status::OK(); +} + +template <> +Status MKLTranspose2D(const char trans, const Tensor& in, Tensor* out) { + const MKL_Complex16 alpha = { 1.0, 0.0 }; + mkl_zomatcopy('R', trans, in.dim_size(0), in.dim_size(1), alpha, + reinterpret_cast(in.flat().data()), + in.dim_size(1), + reinterpret_cast(const_cast(out->flat().data())), + in.dim_size(0)); + return Status::OK(); +} + static const char kMKLTranspose = 'T'; static const char kMKLConjugateTranspose = 'C'; -- GitLab From c356d2800182ef7430a70baa2b1b75ea854f9adf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 08:26:14 -0800 Subject: [PATCH 219/629] Fix a bug of overestimating AUC_PR. When TP and FP are both 0s, the precision should be 0 instead of 1. PiperOrigin-RevId: 185842713 --- .../estimator/python/estimator/head_test.py | 14 ++++---- .../python/estimator/multi_head_test.py | 4 +-- .../python/learn/estimators/head_test.py | 4 +-- .../metrics/python/ops/metric_ops_test.py | 22 ++++++------ .../python/estimator/canned/baseline_test.py | 6 ++-- .../estimator/canned/dnn_testing_utils.py | 2 +- .../python/estimator/canned/head_test.py | 10 +++--- .../estimator/canned/linear_testing_utils.py | 2 +- .../python/kernel_tests/metrics_test.py | 35 ++++++++++++++----- tensorflow/python/ops/metrics_impl.py | 6 ++-- 10 files changed, 62 insertions(+), 43 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 43cdfec968..1411635228 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -446,7 +446,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.7639, + keys.AUC_PR: 0.5972, } self._test_eval( head=head, @@ -478,7 +478,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.7639, + keys.AUC_PR: 0.5972, } self._test_eval( head=head, @@ -509,7 +509,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.7639, + keys.AUC_PR: 0.5972, } self._test_eval( head=head, @@ -543,7 +543,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.7639, + keys.AUC_PR: 0.5972, } self._test_eval( head=head, @@ -573,7 +573,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.7639, + keys.AUC_PR: 0.5972, keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 2. / 4., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 2. / 3., keys.RECALL_AT_THRESHOLD % thresholds[0]: 2. / 3., @@ -621,7 +621,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.2000, - keys.AUC_PR: 0.7833, + keys.AUC_PR: 0.5833, } # Assert spec contains expected tensors. @@ -1095,7 +1095,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.4977, - keys.AUC_PR: 0.6645, + keys.AUC_PR: 0.4037, } self._test_eval( head=head, diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index 65ea89ba1b..e47a6788f3 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -306,8 +306,8 @@ class MultiHeadTest(test.TestCase): # this assert tests that the algorithm remains consistent. keys.AUC + '/head1': 0.1667, keys.AUC + '/head2': 0.3333, - keys.AUC_PR + '/head1': 0.6667, - keys.AUC_PR + '/head2': 0.5000, + keys.AUC_PR + '/head1': 0.49999964, + keys.AUC_PR + '/head2': 0.33333313, } # Assert spec contains expected tensors. diff --git a/tensorflow/contrib/learn/python/learn/estimators/head_test.py b/tensorflow/contrib/learn/python/learn/estimators/head_test.py index 7c2d9bb076..6d5da81b4c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py @@ -362,7 +362,7 @@ class MultiLabelHeadTest(test.TestCase): "auc_precision_recall": 0.166667, "auc_precision_recall/class0": 0, "auc_precision_recall/class1": 0., - "auc_precision_recall/class2": 1., + "auc_precision_recall/class2": 0.49999, "labels/actual_label_mean/class0": self._labels[0][0], "labels/actual_label_mean/class1": self._labels[0][1], "labels/actual_label_mean/class2": self._labels[0][2], @@ -748,7 +748,7 @@ class BinaryClassificationHeadTest(test.TestCase): "accuracy/baseline_label_mean": label_mean, "accuracy/threshold_0.500000_mean": 1. / 2, "auc": 1. / 2, - "auc_precision_recall": 0.749999, + "auc_precision_recall": 0.25, "labels/actual_label_mean": label_mean, "labels/prediction_mean": .731059, # softmax "loss": expected_loss, diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index e067f08bab..b4e365d10f 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1802,9 +1802,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.54166603, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.54166603, auc.eval(), delta=1e-3) def testAnotherAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1816,9 +1816,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.44365042, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.44365042, auc.eval(), delta=1e-3) def testThirdAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1830,9 +1830,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.73611039, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.73611039, auc.eval(), delta=1e-3) def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) @@ -1865,9 +1865,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(1, sess.run(update_op), 6) + self.assertAlmostEqual(0.49999976, sess.run(update_op), 6) - self.assertAlmostEqual(1, auc.eval(), 6) + self.assertAlmostEqual(0.49999976, auc.eval(), 6) def testWithMultipleUpdates(self): num_samples = 1000 @@ -6689,7 +6689,8 @@ class CohenKappaTest(test.TestCase): # [[0, 25, 0], # [0, 0, 25], # [25, 0, 0]] - # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(labels, predictions) + # Calculated by v0.19: sklearn.metrics.cohen_kappa_score( + # labels, predictions) expect = -0.333333333333 with self.test_session() as sess: @@ -6748,7 +6749,8 @@ class CohenKappaTest(test.TestCase): weights_t: weights[batch_start:batch_end] }) # Calculated by v0.19: sklearn.metrics.cohen_kappa_score( - # labels_np, predictions_np, sample_weight=weights_np) + # labels_np, predictions_np, + # sample_weight=weights_np) expect = 0.289965397924 self.assertAlmostEqual(expect, kappa.eval(), 5) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py index 96639e88ea..18c955f5a0 100644 --- a/tensorflow/python/estimator/canned/baseline_test.py +++ b/tensorflow/python/estimator/canned/baseline_test.py @@ -1075,7 +1075,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., - metric_keys.MetricKeys.AUC_PR: 1., + metric_keys.MetricKeys.AUC_PR: 0.5, } else: # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) @@ -1136,7 +1136,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.75, + metric_keys.MetricKeys.AUC_PR: 0.25, } else: # Expand logits since batch_size=2 @@ -1212,7 +1212,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.ACCURACY_BASELINE: ( max(label_mean, 1-label_mean)), metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), + metric_keys.MetricKeys.AUC_PR: 0.16666645, } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index 706575985f..cbae43e4f7 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -1041,7 +1041,7 @@ class BaseDNNClassifierEvaluateTest(object): # There is no good way to calculate AUC for only two data points. But # that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.75, + metric_keys.MetricKeys.AUC_PR: 0.25, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 3a03770af4..c09f88262a 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -1558,7 +1558,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 1., + keys.AUC_PR: 0.74999905, } # Assert spec contains expected tensors. @@ -1636,7 +1636,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 1., + keys.AUC_PR: 0.75, } # Assert predictions, loss, and metrics. @@ -1741,7 +1741,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 1., + keys.AUC_PR: 0.74999905, keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1., keys.RECALL_AT_THRESHOLD % thresholds[0]: 1., @@ -2188,7 +2188,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: expected_label_mean, keys.ACCURACY_BASELINE: 1 - expected_label_mean, keys.AUC: .45454565, - keys.AUC_PR: .6737757325172424, + keys.AUC_PR: .21923049, } # Assert spec contains expected tensors. @@ -2487,7 +2487,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): # We cannot reliably calculate AUC with only 4 data points, but the # values should not change because of backwards-compatibility. keys.AUC: 0.5222, - keys.AUC_PR: 0.7341, + keys.AUC_PR: 0.5119, } tol = 1e-2 diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py index 3e9183cf1b..e88fcbbd2e 100644 --- a/tensorflow/python/estimator/canned/linear_testing_utils.py +++ b/tensorflow/python/estimator/canned/linear_testing_utils.py @@ -1342,7 +1342,7 @@ class BaseLinearClassifierEvaluationTest(object): metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., - metric_keys.MetricKeys.AUC_PR: 1., + metric_keys.MetricKeys.AUC_PR: 0.5, } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index e0e752147c..fd78c026c2 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -1105,9 +1105,9 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.54166, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.54166, auc.eval(), delta=1e-3) def testAnotherAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1119,9 +1119,9 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.44365042, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.44365042, auc.eval(), delta=1e-3) def testThirdAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1133,9 +1133,26 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.73611039, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.73611039, auc.eval(), delta=1e-3) + + def testFourthAUCPRSpecialCase(self): + # Create the labels and data. + labels = np.array([ + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]) + predictions = np.array([ + 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35]) + + with self.test_session() as sess: + auc, _ = metrics.auc( + labels, predictions, curve='PR', num_thresholds=11) + + sess.run(variables.local_variables_initializer()) + # Since this is only approximate, we can't expect a 6 digits match. + # Although with higher number of samples/thresholds we should see the + # accuracy improving + self.assertAlmostEqual(0.0, auc.eval(), delta=0.001) def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) @@ -1161,16 +1178,16 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) - def testRecallOneAndPrecisionOneGivesOnePRAUC(self): + def testRecallOneAndPrecisionOne(self): with self.test_session() as sess: predictions = array_ops.ones([4], dtype=dtypes_lib.float32) labels = array_ops.ones([4]) auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(1, sess.run(update_op), 6) + self.assertAlmostEqual(0.5, sess.run(update_op), 6) - self.assertAlmostEqual(1, auc.eval(), 6) + self.assertAlmostEqual(0.5, auc.eval(), 6) def np_auc(self, predictions, labels, weights): """Computes the AUC explicitly using Numpy. diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 7776ff08c4..44c2f304cf 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -672,7 +672,7 @@ def auc(labels, x = fp_rate y = rec else: # curve == 'PR'. - prec = math_ops.div(tp + epsilon, tp + fp + epsilon) + prec = math_ops.div(tp, tp + fp + epsilon) x = rec y = prec if summation_method == 'trapezoidal': @@ -923,8 +923,8 @@ def mean_per_class_accuracy(labels, weights = array_ops.reshape(weights, [-1]) weights = math_ops.to_float(weights) - is_correct = is_correct * weights - ones = ones * weights + is_correct *= weights + ones *= weights update_total_op = state_ops.scatter_add(total, labels, ones) update_count_op = state_ops.scatter_add(count, labels, is_correct) -- GitLab From 62b3b9a0d40aacb2c890ed56b831dd2568304f89 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 15 Feb 2018 13:31:49 -0500 Subject: [PATCH 220/629] Docs fix r1.6 (#17038) * add missing blank line PiperOrigin-RevId: 185554969 * fix cuDNN64 dll name --- tensorflow/docs_src/get_started/get_started_for_beginners.md | 4 ++++ tensorflow/docs_src/get_started/premade_estimators.md | 1 + tensorflow/docs_src/install/install_windows.md | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md index ea1c2fb3f4..446bae4b89 100644 --- a/tensorflow/docs_src/get_started/get_started_for_beginners.md +++ b/tensorflow/docs_src/get_started/get_started_for_beginners.md @@ -36,6 +36,7 @@ the following three: alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), @@ -188,6 +189,7 @@ provides a programming stack consisting of multiple API layers:
    + **The TensorFlow Programming Environment.**

     

    @@ -380,6 +382,7 @@ fully connected neural network consisting of three hidden layers:
    + **A neural network with three hidden layers.**

     

    @@ -568,6 +571,7 @@ of 0.5. The following suggests a more effective model: 5.5 2.5 4.0 1.3 1 1 + **A model that is 80% accurate.**

     

    diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md index 4f01f997c3..6bffd2e065 100644 --- a/tensorflow/docs_src/get_started/premade_estimators.md +++ b/tensorflow/docs_src/get_started/premade_estimators.md @@ -98,6 +98,7 @@ classifies Iris flowers into three different species based on the size of their alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 86a111c2ec..87e1a715aa 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -47,7 +47,7 @@ installed on your system: If you have a different version of one of the preceding packages, please change to the specified versions. In particular, the cuDNN version -must match exactly: TensorFlow will not load if it cannot find `cuDNN64_6.dll`. +must match exactly: TensorFlow will not load if it cannot find `cuDNN64_7.dll`. To use a different version of cuDNN, you must build from source. ## Determine how to install TensorFlow -- GitLab From b91155edb661e074b716d7051c2cb71cbf9ec759 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 15 Feb 2018 10:39:04 -0800 Subject: [PATCH 221/629] Enable half precision convolution for the CPU and GPU backends. Enhance the CPU IR emitter to support F16 dot operation and convolution operation. Add a CPU runtime implementation for F16 convolution. Enhance the GPU backend to handle F16 convolution thunk. Convert some F32 xla convolution tests to support both F32 and F16 and disable the tests for the CPU backend due to b/72509305. PiperOrigin-RevId: 185862438 --- tensorflow/compiler/xla/array.h | 65 +- tensorflow/compiler/xla/array2d.h | 8 + tensorflow/compiler/xla/array2d_test.cc | 14 + tensorflow/compiler/xla/array3d.h | 10 + tensorflow/compiler/xla/array3d_test.cc | 23 + tensorflow/compiler/xla/array4d.h | 10 + tensorflow/compiler/xla/array4d_test.cc | 30 + tensorflow/compiler/xla/array_test.cc | 19 + .../compiler/xla/service/cpu/cpu_runtime.cc | 4 + .../compiler/xla/service/cpu/cpu_runtime.h | 2 + .../xla/service/cpu/dot_op_emitter.cc | 2 +- .../compiler/xla/service/cpu/ir_emitter.cc | 37 +- .../xla/service/cpu/runtime_conv2d.cc | 21 +- .../compiler/xla/service/cpu/runtime_conv2d.h | 14 + .../xla/service/cpu/runtime_conv2d_impl.h | 31 +- .../cpu/runtime_single_threaded_conv2d.cc | 20 +- .../cpu/runtime_single_threaded_conv2d.h | 14 + .../xla/service/cpu/simple_orc_jit.cc | 2 + .../xla/service/gpu/convolution_thunk.cc | 16 +- .../gpu/cudnn_convolution_algorithm_picker.cc | 24 +- .../service/gpu/cudnn_convolution_runner.cc | 104 ++- .../service/gpu/cudnn_convolution_runner.h | 19 +- .../compiler/xla/service/hlo_evaluator.cc | 9 +- .../compiler/xla/tests/convolution_test.cc | 720 +++++++++++------- tensorflow/compiler/xla/tests/test_macros.h | 27 + 25 files changed, 845 insertions(+), 400 deletions(-) diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 71aa057cd3..46ee4e64c9 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -121,6 +121,23 @@ class Array { CHECK(idx == num_elements()); } + // Creates a 2D array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array(std::initializer_list> values) + : Array(ToInt64Vector({values.size(), values.begin()->size()})) { + int64 idx = 0; + for (const auto& it1 : values) { + for (const auto& it2 : it1) { + values_[idx] = static_cast(it2); + ++idx; + } + } + CHECK(idx == num_elements()); + } + // Creates a 3D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. Array(InitializerList3D values) @@ -138,6 +155,27 @@ class Array { CHECK(idx == num_elements()); } + // Creates a 3D array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array(std::initializer_list>> + values) + : Array(ToInt64Vector({values.size(), values.begin()->size(), + values.begin()->begin()->size()})) { + int64 idx = 0; + for (const auto& it1 : values) { + for (const auto& it2 : it1) { + for (const auto& it3 : it2) { + values_[idx] = static_cast(it3); + ++idx; + } + } + } + CHECK(idx == num_elements()); + } + // Creates a 4D array from the given nested initializer list. The outer // initializer list is the first dimension, and so on. Array(InitializerList4D values) @@ -158,6 +196,31 @@ class Array { CHECK(idx == num_elements()); } + // Creates a 4D array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array(std::initializer_list< + std::initializer_list>>> + values) + : Array(ToInt64Vector({values.size(), values.begin()->size(), + values.begin()->begin()->size(), + values.begin()->begin()->begin()->size()})) { + int64 idx = 0; + for (const auto& it1 : values) { + for (const auto& it2 : it1) { + for (const auto& it3 : it2) { + for (const auto& it4 : it3) { + values_[idx] = static_cast(it4); + ++idx; + } + } + } + } + CHECK(idx == num_elements()); + } + Array(const Array& other) : sizes_(other.sizes_), values_(new T[num_elements()]) { std::copy(&other.values_[0], &other.values_[0] + num_elements(), @@ -185,7 +248,7 @@ class Array { // Fills the array with the sequence i*multiplier for i=0,1,... void FillWithMultiples(const T& multiplier) { for (int64 i = 0; i < num_elements(); ++i) { - values_[i] = i * multiplier; + values_[i] = static_cast(i) * multiplier; } } diff --git a/tensorflow/compiler/xla/array2d.h b/tensorflow/compiler/xla/array2d.h index bb85fbee9b..41f563486d 100644 --- a/tensorflow/compiler/xla/array2d.h +++ b/tensorflow/compiler/xla/array2d.h @@ -52,6 +52,14 @@ class Array2D : public Array { Array2D(std::initializer_list> values) : Array(values) {} + // Creates an array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array2D(std::initializer_list> values) + : Array(values) {} + Array2D(const Array2D& other) : Array(other) {} int64 n1() const { return this->dim(0); } diff --git a/tensorflow/compiler/xla/array2d_test.cc b/tensorflow/compiler/xla/array2d_test.cc index c08e42c20e..93034a719b 100644 --- a/tensorflow/compiler/xla/array2d_test.cc +++ b/tensorflow/compiler/xla/array2d_test.cc @@ -63,6 +63,20 @@ TEST(Array2dTest, InitializerListCtor) { EXPECT_EQ(arr(1, 2), 6); } +TEST(Array2dTest, InitializerListCtorHalf) { + Array2D arr = {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}; + + EXPECT_EQ(arr.n1(), 2); + EXPECT_EQ(arr.n2(), 3); + + EXPECT_EQ(arr(0, 0), static_cast(1)); + EXPECT_EQ(arr(0, 1), static_cast(2)); + EXPECT_EQ(arr(0, 2), static_cast(3)); + EXPECT_EQ(arr(1, 0), static_cast(4)); + EXPECT_EQ(arr(1, 1), static_cast(5)); + EXPECT_EQ(arr(1, 2), static_cast(6)); +} + TEST(Array2dTest, Accessors) { Array2D arr = {{1, 2, 3}, {4, 5, 6}}; diff --git a/tensorflow/compiler/xla/array3d.h b/tensorflow/compiler/xla/array3d.h index a1c5840a5f..e5eb235d45 100644 --- a/tensorflow/compiler/xla/array3d.h +++ b/tensorflow/compiler/xla/array3d.h @@ -57,6 +57,16 @@ class Array3D : public Array { values) : Array(values) {} + // Creates an array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array3D( + std::initializer_list>> + values) + : Array(values) {} + int64 n1() const { return this->dim(0); } int64 n2() const { return this->dim(1); } int64 n3() const { return this->dim(2); } diff --git a/tensorflow/compiler/xla/array3d_test.cc b/tensorflow/compiler/xla/array3d_test.cc index 6b5f4b343b..691ff6c035 100644 --- a/tensorflow/compiler/xla/array3d_test.cc +++ b/tensorflow/compiler/xla/array3d_test.cc @@ -69,6 +69,29 @@ TEST(Array3dTest, InitializerListCtor) { EXPECT_EQ(arr(2, 3, 1), 24); } +TEST(Array3dTest, InitializerListCtorHalf) { + Array3D arr = { + {{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}, {7.0f, 8.0f}}, + {{9.0f, 10.0f}, {11.0f, 12.0f}, {13.0f, 14.0f}, {15.0f, 16.0f}}, + {{17.0f, 18.0f}, {19.0f, 20.0f}, {21.0f, 22.0f}, {23.0f, 24.0f}}}; + + EXPECT_EQ(arr.n1(), 3); + EXPECT_EQ(arr.n2(), 4); + EXPECT_EQ(arr.n3(), 2); + EXPECT_EQ(arr.num_elements(), 24); + + EXPECT_EQ(arr(0, 0, 0), static_cast(1)); + EXPECT_EQ(arr(0, 0, 1), static_cast(2)); + EXPECT_EQ(arr(0, 1, 0), static_cast(3)); + EXPECT_EQ(arr(0, 3, 1), static_cast(8)); + EXPECT_EQ(arr(1, 0, 0), static_cast(9)); + EXPECT_EQ(arr(1, 1, 1), static_cast(12)); + EXPECT_EQ(arr(2, 0, 0), static_cast(17)); + EXPECT_EQ(arr(2, 1, 1), static_cast(20)); + EXPECT_EQ(arr(2, 2, 0), static_cast(21)); + EXPECT_EQ(arr(2, 3, 1), static_cast(24)); +} + TEST(Array3dTest, Fill) { Array3D fullof7(2, 3, 4, 7); for (int64 n1 = 0; n1 < fullof7.n1(); ++n1) { diff --git a/tensorflow/compiler/xla/array4d.h b/tensorflow/compiler/xla/array4d.h index f8b2b2afe5..cff70e54ba 100644 --- a/tensorflow/compiler/xla/array4d.h +++ b/tensorflow/compiler/xla/array4d.h @@ -82,6 +82,16 @@ class Array4D : public Array { values) : Array(values) {} + // Creates an array of Eigen::half from the given nested initializer list of + // float values. + template ::value && + std::is_same::value>::type> + Array4D(std::initializer_list>>> + values) + : Array(values) {} + // Numerically-named aliases for the various dimensions. This matches the // dimension names used in array3d. int64 n4() const { return this->dim(3); } diff --git a/tensorflow/compiler/xla/array4d_test.cc b/tensorflow/compiler/xla/array4d_test.cc index 3bc8148c91..927733ea1e 100644 --- a/tensorflow/compiler/xla/array4d_test.cc +++ b/tensorflow/compiler/xla/array4d_test.cc @@ -97,6 +97,36 @@ TEST(Array3dTest, InitializerListCtor) { EXPECT_EQ(arr(2, 3, 1, 0), 24); } +TEST(Array3dTest, InitializerListCtorHalf) { + Array4D arr = { + {{{1.0f}, {2.0f}}, {{3.0f}, {4.0f}}, {{5.0f}, {6.0f}}, {{7.0f}, {8.0f}}}, + {{{9.0f}, {10.0f}}, + {{11.0f}, {12.0f}}, + {{13.0f}, {14.0f}}, + {{15.0f}, {16.0f}}}, + {{{17.0f}, {18.0f}}, + {{19.0f}, {20.0f}}, + {{21.0f}, {22.0f}}, + {{23.0f}, {24.0f}}}}; + + EXPECT_EQ(arr.n1(), 3); + EXPECT_EQ(arr.n2(), 4); + EXPECT_EQ(arr.n3(), 2); + EXPECT_EQ(arr.n4(), 1); + EXPECT_EQ(arr.num_elements(), 24); + + EXPECT_EQ(arr(0, 0, 0, 0), static_cast(1)); + EXPECT_EQ(arr(0, 0, 1, 0), static_cast(2)); + EXPECT_EQ(arr(0, 1, 0, 0), static_cast(3)); + EXPECT_EQ(arr(0, 3, 1, 0), static_cast(8)); + EXPECT_EQ(arr(1, 0, 0, 0), static_cast(9)); + EXPECT_EQ(arr(1, 1, 1, 0), static_cast(12)); + EXPECT_EQ(arr(2, 0, 0, 0), static_cast(17)); + EXPECT_EQ(arr(2, 1, 1, 0), static_cast(20)); + EXPECT_EQ(arr(2, 2, 0, 0), static_cast(21)); + EXPECT_EQ(arr(2, 3, 1, 0), static_cast(24)); +} + TEST(Array4dTest, Fill) { Array4D fullof7(2, 3, 4, 5, 7); fullof7.Each([](tensorflow::gtl::ArraySlice idx, int* cell) { diff --git a/tensorflow/compiler/xla/array_test.cc b/tensorflow/compiler/xla/array_test.cc index 8b94194774..e8356c9832 100644 --- a/tensorflow/compiler/xla/array_test.cc +++ b/tensorflow/compiler/xla/array_test.cc @@ -60,6 +60,25 @@ TEST(ArrayTest, InitializerListCtor) { EXPECT_EQ(arr(1, 2), 6); } +TEST(ArrayTest, InitializerListCtorHalf) { + Array d2({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}); + EXPECT_EQ(d2.dim(0), 2); + EXPECT_EQ(d2.dim(1), 3); + + Array d3({{{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}}); + EXPECT_EQ(d3.dim(0), 3); + EXPECT_EQ(d3.dim(1), 2); + EXPECT_EQ(d3.dim(2), 1); + + Array d4( + {{{{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}}, + {{{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}, {{1.0f}, {4.0f}}}}); + EXPECT_EQ(d4.dim(0), 2); + EXPECT_EQ(d4.dim(1), 3); + EXPECT_EQ(d4.dim(2), 2); + EXPECT_EQ(d4.dim(3), 1); +} + TEST(ArrayTest, IndexingReadWrite) { Array arr({2, 3}); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 1ef45dbec3..40ace96327 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -35,6 +35,8 @@ extern const char* const kEigenMatMulF32SymbolName = "__xla_cpu_runtime_EigenMatMulF32"; extern const char* const kEigenMatMulF64SymbolName = "__xla_cpu_runtime_EigenMatMulF64"; +extern const char* const kEigenConvF16SymbolName = + "__xla_cpu_runtime_EigenConvF16"; extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; @@ -42,6 +44,8 @@ extern const char* const kEigenSingleThreadedMatMulF32SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF32"; extern const char* const kEigenSingleThreadedMatMulF64SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF64"; +extern const char* const kEigenSingleThreadedConvF16SymbolName = + "__xla_cpu_runtime_EigenSingleThreadedConvF16"; extern const char* const kEigenSingleThreadedConvF32SymbolName = "__xla_cpu_runtime_EigenSingleThreadedConvF32"; extern const char* const kAcquireInfeedBufferForDequeueSymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 3e1f080711..2141dfe1ce 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -43,10 +43,12 @@ namespace runtime { // because it is a symbol in the cpu_runtime library. extern const char* const kEigenMatMulF32SymbolName; extern const char* const kEigenMatMulF64SymbolName; +extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; +extern const char* const kEigenSingleThreadedConvF16SymbolName; extern const char* const kEigenSingleThreadedConvF32SymbolName; extern const char* const kAcquireInfeedBufferForDequeueSymbolName; extern const char* const kReleaseInfeedBufferAfterDequeueSymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index c9fc586b9a..cfe7c9c3af 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -549,7 +549,7 @@ DotOpEmitter::DotOpEmitter( const HloModuleConfig& hlo_module_config, const TargetMachineFeatures& target_machine_features) { PrimitiveType type = target_array.GetShape().element_type(); - TF_RET_CHECK(F32 == type || F64 == type || C64 == type); + TF_RET_CHECK(F16 == type || F32 == type || F64 == type || C64 == type); DotOpEmitter dot_emitter(dot, transpose_lhs, transpose_rhs, target_array, lhs_array, rhs_array, addend_array, executable_run_options_value, ir_builder, diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 0b2d3d4746..496aea051c 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -801,7 +801,7 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { auto rhs = dot->operand(1); TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*dot, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F32, F64, C64})); + /*supported_types=*/{F16, F32, F64, C64})); const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); if (dnums.lhs_batch_dimensions_size() > 0 || dnums.rhs_batch_dimensions_size() > 0) { @@ -849,7 +849,7 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { const auto& window = convolution->window(); TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*convolution, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F32, C64})); + /*supported_types=*/{F16, F32, C64})); const ConvolutionDimensionNumbers& dnums = convolution->convolution_dimension_numbers(); @@ -928,25 +928,30 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { int64 rhs_col_dilation = one_dim_convolution ? 1 : window.dimensions(1).window_dilation(); - // Args have been computed, make the call. - llvm::Type* float_ptr_type = ir_builder_.getFloatTy()->getPointerTo(); + PrimitiveType primitive_type = lhs->shape().element_type(); + llvm::Type* ir_ptr_type = primitive_type == F16 + ? ir_builder_.getHalfTy()->getPointerTo() + : ir_builder_.getFloatTy()->getPointerTo(); llvm::Type* int64_type = ir_builder_.getInt64Ty(); llvm::Type* int8_ptr_type = ir_builder_.getInt8Ty()->getPointerTo(); llvm::FunctionType* conv_type = llvm::FunctionType::get( ir_builder_.getVoidTy(), - {int8_ptr_type, float_ptr_type, float_ptr_type, float_ptr_type, - int64_type, int64_type, int64_type, int64_type, - int64_type, int64_type, int64_type, int64_type, - int64_type, int64_type, int64_type, int64_type, - int64_type, int64_type, int64_type, int64_type, - int64_type, int64_type, int64_type, int64_type}, + {int8_ptr_type, ir_ptr_type, ir_ptr_type, ir_ptr_type, int64_type, + int64_type, int64_type, int64_type, int64_type, int64_type, + int64_type, int64_type, int64_type, int64_type, int64_type, + int64_type, int64_type, int64_type, int64_type, int64_type, + int64_type, int64_type, int64_type, int64_type}, /*isVarArg=*/false); bool multi_threaded_eigen = hlo_module_config_.debug_options().xla_cpu_multi_thread_eigen(); const char* fn_name = - (multi_threaded_eigen - ? runtime::kEigenConvF32SymbolName - : runtime::kEigenSingleThreadedConvF32SymbolName); + primitive_type == F16 + ? (multi_threaded_eigen + ? runtime::kEigenConvF16SymbolName + : runtime::kEigenSingleThreadedConvF16SymbolName) + : (multi_threaded_eigen + ? runtime::kEigenConvF32SymbolName + : runtime::kEigenSingleThreadedConvF32SymbolName); llvm::Function* conv_func = llvm::cast( module_->getOrInsertFunction(fn_name, conv_type)); conv_func->setCallingConv(llvm::CallingConv::C); @@ -956,9 +961,9 @@ Status IrEmitter::HandleConvolution(HloInstruction* convolution) { conv_func, { GetExecutableRunOptionsArgument(), ir_builder_.CreateBitCast( - GetEmittedValueFor(convolution), float_ptr_type), - ir_builder_.CreateBitCast(lhs_address, float_ptr_type), - ir_builder_.CreateBitCast(rhs_address, float_ptr_type), + GetEmittedValueFor(convolution), ir_ptr_type), + ir_builder_.CreateBitCast(lhs_address, ir_ptr_type), + ir_builder_.CreateBitCast(rhs_address, ir_ptr_type), ir_builder_.getInt64(input_batch), ir_builder_.getInt64(input_rows), ir_builder_.getInt64(input_cols), diff --git a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc index c2f64eb27a..3905e7ff2a 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.cc @@ -34,7 +34,26 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF32( int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) { const xla::ExecutableRunOptions* run_options = static_cast(run_options_ptr); - tensorflow::xla::EigenConvF32Impl( + tensorflow::xla::EigenConvImpl( + *run_options->intra_op_thread_pool(), out, lhs, rhs, input_batch, + input_rows, input_cols, input_channels, kernel_rows, kernel_cols, + kernel_channels, kernel_filters, output_rows, output_cols, row_stride, + col_stride, padding_top, padding_bottom, padding_left, padding_right, + lhs_row_dilation, lhs_col_dilation, rhs_row_dilation, rhs_col_dilation); +} + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenConvF16( + const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols, + int64 input_channels, int64 kernel_rows, int64 kernel_cols, + int64 kernel_channels, int64 kernel_filters, int64 output_rows, + int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top, + int64 padding_bottom, int64 padding_left, int64 padding_right, + int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, + int64 rhs_col_dilation) { + const xla::ExecutableRunOptions* run_options = + static_cast(run_options_ptr); + tensorflow::xla::EigenConvImpl( *run_options->intra_op_thread_pool(), out, lhs, rhs, input_batch, input_rows, input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, kernel_filters, output_rows, output_cols, row_stride, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.h b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.h index 05ae094691..39e20ed456 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d.h @@ -34,6 +34,20 @@ extern void __xla_cpu_runtime_EigenConvF32( tensorflow::int64 lhs_col_dilation, tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation); +extern void __xla_cpu_runtime_EigenConvF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, + tensorflow::int64 input_batch, tensorflow::int64 input_rows, + tensorflow::int64 input_cols, tensorflow::int64 input_channels, + tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols, + tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters, + tensorflow::int64 output_rows, tensorflow::int64 output_cols, + tensorflow::int64 row_stride, tensorflow::int64 col_stride, + tensorflow::int64 padding_top, tensorflow::int64 padding_bottom, + tensorflow::int64 padding_left, tensorflow::int64 padding_right, + tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation, + tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation); + } // extern "C" #endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_CONV2D_H_ diff --git a/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h b/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h index 02f45fee0f..85af63bb03 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_conv2d_impl.h @@ -24,26 +24,27 @@ limitations under the License. namespace tensorflow { namespace xla { -template -void EigenConvF32Impl(const EigenDevice& device, float* out, float* lhs, - float* rhs, int64 input_batch, int64 input_rows, - int64 input_cols, int64 input_channels, int64 kernel_rows, - int64 kernel_cols, int64 kernel_channels, - int64 kernel_filters, int64 output_rows, - int64 output_cols, int64 row_stride, int64 col_stride, - int64 padding_top, int64 padding_bottom, - int64 padding_left, int64 padding_right, - int64 lhs_row_dilation, int64 lhs_col_dilation, - int64 rhs_row_dilation, int64 rhs_col_dilation) { - const Eigen::TensorMap, +template +void EigenConvImpl(const EigenDevice& device, ScalarType* out, ScalarType* lhs, + ScalarType* rhs, int64 input_batch, int64 input_rows, + int64 input_cols, int64 input_channels, int64 kernel_rows, + int64 kernel_cols, int64 kernel_channels, + int64 kernel_filters, int64 output_rows, int64 output_cols, + int64 row_stride, int64 col_stride, int64 padding_top, + int64 padding_bottom, int64 padding_left, + int64 padding_right, int64 lhs_row_dilation, + int64 lhs_col_dilation, int64 rhs_row_dilation, + int64 rhs_col_dilation) { + const Eigen::TensorMap, Eigen::Aligned> input(lhs, input_batch, input_rows, input_cols, input_channels); - const Eigen::TensorMap, + const Eigen::TensorMap, Eigen::Aligned> kernel(rhs, kernel_rows, kernel_cols, kernel_channels, kernel_filters); - Eigen::TensorMap, Eigen::Aligned> + Eigen::TensorMap, + Eigen::Aligned> output(out, input_batch, output_rows, output_cols, kernel_filters); Eigen::array, 1> contract_dims; @@ -75,7 +76,7 @@ void EigenConvF32Impl(const EigenDevice& device, float* out, float* lhs, row_stride, rhs_col_dilation, rhs_row_dilation, lhs_col_dilation, lhs_row_dilation, padding_left, padding_right, padding_top, - padding_bottom, 0.0f) + padding_bottom, static_cast(0.0f)) .reshape(pre_contract_dims) .contract(kernel.reshape(kernel_dims), contract_dims) .reshape(post_contract_dims); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc index d0b0e11ac0..5afccc6a86 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc @@ -21,6 +21,24 @@ limitations under the License. using tensorflow::int64; +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void +__xla_cpu_runtime_EigenSingleThreadedConvF16( + const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols, + int64 input_channels, int64 kernel_rows, int64 kernel_cols, + int64 kernel_channels, int64 kernel_filters, int64 output_rows, + int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top, + int64 padding_bottom, int64 padding_left, int64 padding_right, + int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, + int64 rhs_col_dilation) { + tensorflow::xla::EigenConvImpl( + Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, + input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, + kernel_filters, output_rows, output_cols, row_stride, col_stride, + padding_top, padding_bottom, padding_left, padding_right, + lhs_row_dilation, lhs_col_dilation, rhs_row_dilation, rhs_col_dilation); +} + TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedConvF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, @@ -30,7 +48,7 @@ __xla_cpu_runtime_EigenSingleThreadedConvF32( int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom, int64 padding_left, int64 padding_right, int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) { - tensorflow::xla::EigenConvF32Impl( + tensorflow::xla::EigenConvImpl( Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, kernel_filters, output_rows, output_cols, row_stride, col_stride, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h index 8ae1a42149..f216bd0152 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h @@ -20,6 +20,20 @@ limitations under the License. extern "C" { +extern void __xla_cpu_runtime_EigenSingleThreadedConvF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, + tensorflow::int64 input_batch, tensorflow::int64 input_rows, + tensorflow::int64 input_cols, tensorflow::int64 input_channels, + tensorflow::int64 kernel_rows, tensorflow::int64 kernel_cols, + tensorflow::int64 kernel_channels, tensorflow::int64 kernel_filters, + tensorflow::int64 output_rows, tensorflow::int64 output_cols, + tensorflow::int64 row_stride, tensorflow::int64 col_stride, + tensorflow::int64 padding_top, tensorflow::int64 padding_bottom, + tensorflow::int64 padding_left, tensorflow::int64 padding_right, + tensorflow::int64 lhs_row_dilation, tensorflow::int64 lhs_col_dilation, + tensorflow::int64 rhs_row_dilation, tensorflow::int64 rhs_col_dilation); + extern void __xla_cpu_runtime_EigenSingleThreadedConvF32( const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, float* lhs, float* rhs, tensorflow::int64 input_batch, diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index 64d3a51f41..f19cb86cc4 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -208,10 +208,12 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue); REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation); + REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenFft); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 15bba49b73..461747b699 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -63,12 +63,12 @@ ConvolutionThunk::ConvolutionThunk( Status ConvolutionThunk::ExecuteOnStream( const BufferAllocations& buffer_allocations, se::Stream* stream) { - se::DeviceMemory input_data( - buffer_allocations.GetDeviceAddress(input_buffer_)); - se::DeviceMemory filter_data( - buffer_allocations.GetDeviceAddress(filter_buffer_)); - se::DeviceMemory output_data( - buffer_allocations.GetDeviceAddress(output_buffer_)); + se::DeviceMemoryBase input_data = + buffer_allocations.GetDeviceAddress(input_buffer_); + se::DeviceMemoryBase filter_data = + buffer_allocations.GetDeviceAddress(filter_buffer_); + se::DeviceMemoryBase output_data = + buffer_allocations.GetDeviceAddress(output_buffer_); se::DeviceMemoryBase scratch = buffer_allocations.GetDeviceAddress(scratch_buffer_); @@ -80,8 +80,8 @@ Status ConvolutionThunk::ExecuteOnStream( filter_data, output_data, scratch, window_, dim_nums_, algorithm_config, stream)); - // Figure out which of output/input/filter is the result produced by this op, - // and write the result tuple. + // Figure out which of output/input/filter is the result produced by + // this op, and write the result tuple. void* result_ptr = [&] { switch (convolution_kind_) { case CudnnConvKind::kForward: diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc index c29aa31d4e..1792893ae4 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc @@ -135,15 +135,6 @@ std::vector GetAlgorithms(CudnnConvKind kind, break; } - // Remove any algorithms with tensor math enabled. These have lower precision - // than regular algorithms, and we don't yet have a way to turn this on/off in - // XLA. - algorithms.erase(std::remove_if(algorithms.begin(), algorithms.end(), - [&](const AlgorithmDesc& a) { - return a.tensor_ops_enabled(); - }), - algorithms.end()); - return algorithms; } @@ -222,6 +213,7 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( ShouldIncludeWinogradNonfusedAlgo(input_shape, output_shape, dnums); se::dnn::ProfileResult best_result; int64 best_result_bytes_used = 0; + for (const AlgorithmDesc& alg : GetAlgorithms(kind, use_winograd_nonfused, stream_exec_)) { ScratchAllocator scratch_allocator(device_ordinal, allocator); @@ -229,14 +221,12 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( VLOG(3) << "Trying algorithm " << AlgorithmToString(alg) << " for " << instr->ToString(); - bool launch_ok = - RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, - se::DeviceMemory(input_buf.ValueOrDie()), - se::DeviceMemory(filter_buf.ValueOrDie()), - se::DeviceMemory(output_buf.ValueOrDie()), - &scratch_allocator, window, dnums, - AlgorithmConfig(alg), &stream, &profile_result) - .ok(); + bool launch_ok = RunCudnnConvolution( + kind, input_shape, filter_shape, output_shape, + input_buf.ValueOrDie(), filter_buf.ValueOrDie(), + output_buf.ValueOrDie(), &scratch_allocator, window, + dnums, AlgorithmConfig(alg), &stream, &profile_result) + .ok(); if (launch_ok && profile_result.is_valid()) { int64 scratch_bytes_used = scratch_allocator.TotalAllocatedBytes(); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc index 81695a6c32..e4ae839e1d 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc @@ -70,39 +70,11 @@ class ScratchBufAllocator : public se::ScratchAllocator { bool allocated_ = false; }; -} // anonymous namespace - -string CudnnConvKindToString(CudnnConvKind kind) { - switch (kind) { - case CudnnConvKind::kForward: - return "forward"; - case CudnnConvKind::kBackwardFilter: - return "backward_filter"; - case CudnnConvKind::kBackwardInput: - return "backward_input"; - } -} - -Status RunCudnnConvolution(CudnnConvKind kind, const Shape& input_shape, - const Shape& filter_shape, const Shape& output_shape, - DeviceMemory input_buf, - DeviceMemory filter_buf, - DeviceMemory output_buf, - DeviceMemoryBase scratch_buf, const Window& window, - const ConvolutionDimensionNumbers& dnums, - AlgorithmConfig algorithm, Stream* stream, - ProfileResult* profile_result /*= nullptr*/) { - ScratchBufAllocator scratch_allocator(scratch_buf); - return RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, - input_buf, filter_buf, output_buf, - &scratch_allocator, window, dnums, algorithm, - stream, profile_result); -} - +template Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, DeviceMemory input_buf, - DeviceMemory filter_buf, DeviceMemory output_buf, + const Shape& output_shape, DeviceMemory input_buf, + DeviceMemory filter_buf, DeviceMemory output_buf, se::ScratchAllocator* scratch_allocator, const Window& window, const ConvolutionDimensionNumbers& dnums, AlgorithmConfig algorithm, Stream* stream, ProfileResult* profile_result /*= nullptr*/) { @@ -124,8 +96,16 @@ Status RunCudnnConvolution( // tensorflow/python/ops/nn_ops.py). const int effective_num_dimensions = std::max(2, num_dimensions); - CHECK_EQ(F32, output_shape.element_type()) - << ShapeUtil::HumanString(output_shape); + if (std::is_same::value) { + CHECK_EQ(F32, output_shape.element_type()) + << ShapeUtil::HumanString(output_shape); + } else if (std::is_same::value) { + CHECK_EQ(F16, output_shape.element_type()) + << ShapeUtil::HumanString(output_shape); + } else { + LOG(FATAL) << ShapeUtil::HumanString(output_shape); + } + CHECK_EQ(num_dimensions, dnums.input_spatial_dimensions_size()); CHECK_EQ(num_dimensions, dnums.kernel_spatial_dimensions_size()); CHECK_EQ(num_dimensions, dnums.output_spatial_dimensions_size()); @@ -220,5 +200,63 @@ Status RunCudnnConvolution( return Status::OK(); } +} // anonymous namespace + +string CudnnConvKindToString(CudnnConvKind kind) { + switch (kind) { + case CudnnConvKind::kForward: + return "forward"; + case CudnnConvKind::kBackwardFilter: + return "backward_filter"; + case CudnnConvKind::kBackwardInput: + return "backward_input"; + } +} + +Status RunCudnnConvolution( + CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, + const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, + perftools::gputools::DeviceMemoryBase filter_buf, + perftools::gputools::DeviceMemoryBase output_buf, + perftools::gputools::DeviceMemoryBase scratch_buf, const Window& window, + const ConvolutionDimensionNumbers& dnums, + perftools::gputools::dnn::AlgorithmConfig algorithm, + perftools::gputools::Stream* stream, + perftools::gputools::dnn::ProfileResult* profile_result) { + ScratchBufAllocator scratch_allocator(scratch_buf); + return RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, + input_buf, filter_buf, output_buf, + &scratch_allocator, window, dnums, algorithm, + stream, profile_result); +} + +Status RunCudnnConvolution( + CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, + const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, + perftools::gputools::DeviceMemoryBase filter_buf, + perftools::gputools::DeviceMemoryBase output_buf, + perftools::gputools::ScratchAllocator* scratch_allocator, + const Window& window, const ConvolutionDimensionNumbers& dnums, + perftools::gputools::dnn::AlgorithmConfig algorithm, + perftools::gputools::Stream* stream, + perftools::gputools::dnn::ProfileResult* profile_result) { + PrimitiveType output_primitive_type = output_shape.element_type(); + CHECK(output_primitive_type == F32 || output_primitive_type == F16) + << ShapeUtil::HumanString(output_shape); + if (output_primitive_type == F32) { + return RunCudnnConvolution( + kind, input_shape, filter_shape, output_shape, + se::DeviceMemory(input_buf), se::DeviceMemory(filter_buf), + se::DeviceMemory(output_buf), scratch_allocator, window, dnums, + algorithm, stream, profile_result); + } + return RunCudnnConvolution(kind, input_shape, filter_shape, output_shape, + se::DeviceMemory(input_buf), + se::DeviceMemory(filter_buf), + se::DeviceMemory(output_buf), + scratch_allocator, window, dnums, algorithm, + stream, profile_result); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h index b101f76510..3dbfa2730d 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h @@ -55,7 +55,10 @@ string CudnnConvKindToString(CudnnConvKind kind); // Note that depending on the value of CudnnConvKind, the result of this call // may be written into input_buf, filter_buf, or output_buf! // -// At the moment we only support cudnn convolutions over floats. +// At the moment we only support cudnn convolutions over float and half, and +// convolution with half data type is implemented with cudnn PSEUDO_HALF +// configuration, that is, the input values are half and the internal +// computation type is float. // // We provide one overload which takes a scratch buffer, and another which takes // an allocator which is responsible for allocating the scratch space. In @@ -69,10 +72,9 @@ string CudnnConvKindToString(CudnnConvKind kind); // that size, if you like. Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, - perftools::gputools::DeviceMemory input_buf, - perftools::gputools::DeviceMemory filter_buf, - perftools::gputools::DeviceMemory output_buf, + const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, + perftools::gputools::DeviceMemoryBase filter_buf, + perftools::gputools::DeviceMemoryBase output_buf, perftools::gputools::DeviceMemoryBase scratch_buf, const Window& window, const ConvolutionDimensionNumbers& dnums, perftools::gputools::dnn::AlgorithmConfig algorithm, @@ -81,10 +83,9 @@ Status RunCudnnConvolution( Status RunCudnnConvolution( CudnnConvKind kind, const Shape& input_shape, const Shape& filter_shape, - const Shape& output_shape, - perftools::gputools::DeviceMemory input_buf, - perftools::gputools::DeviceMemory filter_buf, - perftools::gputools::DeviceMemory output_buf, + const Shape& output_shape, perftools::gputools::DeviceMemoryBase input_buf, + perftools::gputools::DeviceMemoryBase filter_buf, + perftools::gputools::DeviceMemoryBase output_buf, perftools::gputools::ScratchAllocator* scratch_allocator, const Window& window, const ConvolutionDimensionNumbers& dnums, perftools::gputools::dnn::AlgorithmConfig algorithm, diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 81212cda42..eb6e9feb7c 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1403,6 +1403,11 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); break; } + case F16: { + TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], + MapImpl(map)); + break; + } case F32: { TF_ASSIGN_OR_RETURN(parent_->evaluated_[map], MapImpl(map)); break; @@ -2041,9 +2046,7 @@ HloEvaluator::HloEvaluator() { }); typed_visitors_[S32] = MakeUnique>(this); typed_visitors_[S64] = MakeUnique>(this); - typed_visitors_[F16] = MakeUnique([](HloInstruction*) { - return Unimplemented("HloEvaluator: unhandled primitive type: F16."); - }); + typed_visitors_[F16] = MakeUnique>(this); typed_visitors_[F32] = MakeUnique>(this); typed_visitors_[F64] = MakeUnique>(this); typed_visitors_[C64] = MakeUnique>(this); diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 0ceb9aff37..1385b437fc 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -53,157 +53,200 @@ class ConvolutionTest : public ClientLibraryTestBase { #endif }; -XLA_TEST_F(ConvolutionTest, ForwardPassConvolution_3x3x256_256_OutputZ_Iota) { - const int kInputActivationSizeY = 3; - const int kInputActivationSizeX = 3; - const int kInputActivationSizeZ = 256; - const int kKernelSizeX = 2; - const int kKernelSizeY = 2; - const int kOutputActivationSizeZ = 256; - const int kMiniBatchSize = 4; - auto alhs = - MakeUnique>(kMiniBatchSize, kInputActivationSizeZ, - kInputActivationSizeY, kInputActivationSizeX); - alhs->FillWithMultiples(1.0f); - ASSERT_EQ(3, alhs->width()); - ASSERT_EQ(3, alhs->height()); - - auto arhs = - MakeUnique>(kOutputActivationSizeZ, kInputActivationSizeZ, - kKernelSizeY, kKernelSizeX); - Array2D rhs_raster({ - {1.0f, 0.0f}, // row 0 - {0.0f, 0.0f}, // row 1 - }); - arhs->FillWithYX(rhs_raster); - ASSERT_EQ(2, arhs->width()); - ASSERT_EQ(2, arhs->height()); +// TODO(b/72509305): Enable half data type tests for CPU +#if (XLA_TEST_BACKEND_GPU) +using TestTypes = ::testing::Types; +#else +using TestTypes = ::testing::Types; +#endif - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR4FromArray4D(*alhs); - auto rhs = builder.ConstantR4FromArray4D(*arhs); - auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); +template +Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions); - ComputeAndCompare(&builder, conv, {}, error_spec_); +template <> +Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions) { + return ShapeUtil::MakeShape(F32, dimensions); } -TEST_F(ConvolutionTest, Convolve_1x1x1x2_1x1x1x2_Valid) { - ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); +template <> +Shape MakeShapeWrapper( + tensorflow::gtl::ArraySlice dimensions) { + return ShapeUtil::MakeShape(F16, dimensions); +} - Array4D input_data(1, 1, 1, 2); - input_data.FillWithYX(Array2D({ - {1, 2}, - })); - Array4D filter_data(1, 1, 1, 2); - filter_data.FillWithYX(Array2D({ - {5, 6}, - })); +template +class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest { + public: + void RunTest() { + const int kInputActivationSizeY = 3; + const int kInputActivationSizeX = 3; + const int kInputActivationSizeZ = 256; + const int kKernelSizeX = 2; + const int kKernelSizeY = 2; + const int kOutputActivationSizeZ = 256; + const int kMiniBatchSize = 4; + auto alhs = + MakeUnique>(kMiniBatchSize, kInputActivationSizeZ, + kInputActivationSizeY, kInputActivationSizeX); + alhs->FillWithMultiples(static_cast(1.0f)); + ASSERT_EQ(3, alhs->width()); + ASSERT_EQ(3, alhs->height()); + + auto arhs = + MakeUnique>(kOutputActivationSizeZ, kInputActivationSizeZ, + kKernelSizeY, kKernelSizeX); + Array2D rhs_raster({ + {1.0f, 0.0f}, // row 0 + {0.0f, 0.0f}, // row 1 + }); + arhs->FillWithYX(rhs_raster); + ASSERT_EQ(2, arhs->width()); + ASSERT_EQ(2, arhs->height()); + + ComputationBuilder builder(client_, TestName()); + auto lhs = builder.ConstantR4FromArray4D(*alhs); + auto rhs = builder.ConstantR4FromArray4D(*arhs); + auto conv = builder.Conv(lhs, rhs, {1, 1}, Padding::kValid); + + ComputeAndCompare(&builder, conv, {}, error_spec_); + } +}; - ComputeAndCompare(&builder, conv, - {std::move(*Literal::CreateFromArray(input_data)), - std::move(*Literal::CreateFromArray(filter_data))}, - error_spec_); +TYPED_TEST_CASE(ForwardPassConvolution_3x3x256_256_OutputZ_Iota, TestTypes); +XLA_TYPED_TEST(ForwardPassConvolution_3x3x256_256_OutputZ_Iota, Types) { + this->RunTest(); } +template +class Convolve_1x1x1x2_1x1x1x2_Valid : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + Shape input_shape = MakeShapeWrapper({1, 1, 1, 2}); + Shape filter_shape = MakeShapeWrapper({1, 1, 1, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D input_data(1, 1, 1, 2); + input_data.FillWithYX(Array2D({ + {1.0f, 2.0f}, + })); + Array4D filter_data(1, 1, 1, 2); + filter_data.FillWithYX(Array2D({ + {5.0f, 6.0f}, + })); + + ComputeAndCompare(&builder, conv, + {std::move(*Literal::CreateFromArray(input_data)), + std::move(*Literal::CreateFromArray(filter_data))}, + error_spec_); + } +}; + +TYPED_TEST_CASE(Convolve_1x1x1x2_1x1x1x2_Valid, TestTypes); +TYPED_TEST(Convolve_1x1x1x2_1x1x1x2_Valid, Types) { this->RunTest(); } + // Tests valid padding for 2D convolution in raster space. -TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Valid) { - ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); +template +class Convolve_1x1x4x4_1x1x2x2_Valid : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); + Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); + + Array4D input_data(1, 1, 4, 4); + input_data.FillWithYX(Array2D({ + {1.0f, 2.0f, 3.0f, 4.0f}, + {5.0f, 6.0f, 7.0f, 8.0f}, + {9.0f, 10.0f, 11.0f, 12.0f}, + {13.0f, 14.0f, 15.0f, 16.0f}, + })); + Array4D filter_data(1, 1, 2, 2); + filter_data.FillWithYX(Array2D({ + {5.0f, 6.0f}, + {7.0f, 8.0f}, + })); + ComputeAndCompare(&builder, conv, + {std::move(*Literal::CreateFromArray(input_data)), + std::move(*Literal::CreateFromArray(filter_data))}, + error_spec_); + } +}; - Array4D input_data(1, 1, 4, 4); - // clang-format off - input_data.FillWithYX(Array2D({ - {1, 2, 3, 4 }, - {5, 6, 7, 8 }, - {9, 10, 11, 12}, - {13, 14, 15, 16}, - })); - // clang-format on - Array4D filter_data(1, 1, 2, 2); - // clang-format off - filter_data.FillWithYX(Array2D({ - {5, 6}, - {7, 8}, - })); - // clang-format on - ComputeAndCompare(&builder, conv, - {std::move(*Literal::CreateFromArray(input_data)), - std::move(*Literal::CreateFromArray(filter_data))}, - error_spec_); -} +TYPED_TEST_CASE(Convolve_1x1x4x4_1x1x2x2_Valid, TestTypes); +TYPED_TEST(Convolve_1x1x4x4_1x1x2x2_Valid, Types) { this->RunTest(); } // Tests same padding for 2D convolution in raster space. -TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x2x2_Same) { - ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - - Array4D input_data(1, 1, 4, 4); - // clang-format off - input_data.FillWithYX(Array2D({ - {1, 2, 3, 4 }, - {5, 6, 7, 8 }, - {9, 10, 11, 12}, - {13, 14, 15, 16}, - })); - // clang-format on - Array4D filter_data(1, 1, 2, 2); - // clang-format off - filter_data.FillWithYX(Array2D({ - {5, 6}, - {7, 8}, - })); - // clang-format on - ComputeAndCompare(&builder, conv, - {std::move(*Literal::CreateFromArray(input_data)), - std::move(*Literal::CreateFromArray(filter_data))}, - error_spec_); -} +template +class Convolve_1x1x4x4_1x1x2x2_Same : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); + Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D input_data(1, 1, 4, 4); + input_data.FillWithYX(Array2D({ + {1.0f, 2.0f, 3.0f, 4.0f}, + {5.0f, 6.0f, 7.0f, 8.0f}, + {9.0f, 10.0f, 11.0f, 12.0f}, + {13.0f, 14.0f, 15.0f, 16.0f}, + })); + Array4D filter_data(1, 1, 2, 2); + filter_data.FillWithYX(Array2D({ + {5.0f, 6.0f}, + {7.0f, 8.0f}, + })); + + ComputeAndCompare(&builder, conv, + {std::move(*Literal::CreateFromArray(input_data)), + std::move(*Literal::CreateFromArray(filter_data))}, + error_spec_); + } +}; + +TYPED_TEST_CASE(Convolve_1x1x4x4_1x1x2x2_Same, TestTypes); +TYPED_TEST(Convolve_1x1x4x4_1x1x2x2_Same, Types) { this->RunTest(); } // Tests same padding for 2D convolution in raster space with an odd sized // kernel. -TEST_F(ConvolutionTest, Convolve_1x1x4x4_1x1x3x3_Same) { - ComputationBuilder builder(client_, TestName()); - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 4, 4}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 1, 3, 3}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); - - Array4D input_data(1, 1, 4, 4); - // clang-format off - input_data.FillWithYX(Array2D({ - {1, 2, 3, 4 }, - {5, 6, 7, 8 }, - {9, 10, 11, 12}, - {13, 14, 15, 16}, - })); - // clang-format on - Array4D filter_data(1, 1, 3, 3); - // clang-format off - filter_data.FillWithYX(Array2D({ - { 5, 6, 7}, - { 8, 9, 10}, - {11, 12, 13}, - })); - // clang-format on - ComputeAndCompare(&builder, conv, - {std::move(*Literal::CreateFromArray(input_data)), - std::move(*Literal::CreateFromArray(filter_data))}, - error_spec_); -} +template +class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); + Shape filter_shape = MakeShapeWrapper({1, 1, 3, 3}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); + + Array4D input_data(1, 1, 4, 4); + input_data.FillWithYX(Array2D({{1.0f, 2.0f, 3.0f, 4.0f}, + {5.0f, 6.0f, 7.0f, 8.0f}, + {9.0f, 10.0f, 11.0f, 12.0f}, + {13.0f, 14.0f, 15.0f, 16.0f}})); + Array4D filter_data(1, 1, 3, 3); + filter_data.FillWithYX(Array2D( + {{5.0f, 6.0f, 7.0f}, {8.0f, 9.0f, 10.0f}, {11.0f, 12.0f, 13.0f}})); + // clang-format on + ComputeAndCompare(&builder, conv, + {std::move(*Literal::CreateFromArray(input_data)), + std::move(*Literal::CreateFromArray(filter_data))}, + error_spec_); + } +}; + +TYPED_TEST_CASE(Convolve_1x1x4x4_1x1x3x3_Same, TestTypes); +TYPED_TEST(Convolve_1x1x4x4_1x1x3x3_Same, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { ComputationBuilder builder(client_, TestName()); @@ -232,36 +275,44 @@ XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_Valid) { error_spec_); } -XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithRHSDilation) { - ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - builder.ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, - /*lhs_dilation=*/{1}, /*rhs_dilation=*/{2}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); +template +class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = MakeShapeWrapper({1, 2, 5}); + Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + builder.ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{0, 0}}, + /*lhs_dilation=*/{1}, /*rhs_dilation=*/{2}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input( + {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); + Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); + + Array3D expected({{{570.0f, 670.0f, 770.0f}}}); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } +}; // namespace - Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); - Array3D filter({{{10, 20}, {30, 40}}}); - - Array3D expected({{{570, 670, 770}}}); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} +TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithRHSDilation, TestTypes); +TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithRHSDilation, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSDilation) { ComputationBuilder builder(client_, TestName()); @@ -325,36 +376,45 @@ XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithLHSAndRHSDilation) { error_spec_); } -XLA_TEST_F(ConvolutionTest, Convolve1D_1x2x5_1x2x2_WithPadding) { - ComputationBuilder builder(client_, TestName()); - { - Shape input_shape = ShapeUtil::MakeShape(F32, {1, 2, 5}); - Shape filter_shape = ShapeUtil::MakeShape(F32, {1, 2, 2}); - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - // Convolution dimensions are bf0_oi0->bo0. - builder.ConvGeneralDilated( - input, filter, /*window_strides=*/{1}, /*padding=*/{{2, 2}}, - /*lhs_dilation=*/{1}, /*rhs_dilation=*/{1}, - /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); +template +class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + { + Shape input_shape = MakeShapeWrapper({1, 2, 5}); + Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + // Convolution dimensions are bf0_oi0->bo0. + builder.ConvGeneralDilated( + input, filter, /*window_strides=*/{1}, /*padding=*/{{2, 2}}, + /*lhs_dilation=*/{1}, /*rhs_dilation=*/{1}, + /*dimension_numbers=*/builder.CreateDefaultConvDimensionNumbers(1)); + } + + Array3D input( + {{{1.0f, 2.0f, 3.0f, 4.0f, 5.0f}, {6.0f, 7.0f, 8.0f, 9.0f, 10.0f}}}); + Array3D filter({{{10.0f, 20.0f}, {30.0f, 40.0f}}}); + + Array3D expected( + {{{0.0f, 260.0f, 510.0f, 610.0f, 710.0f, 810.0f, 350.0f, 0.0f}}}); + + auto input_literal = + client_->TransferToServer(*Literal::CreateR3FromArray3D(input)) + .ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*Literal::CreateR3FromArray3D(filter)) + .ConsumeValueOrDie(); + + ComputeAndCompareR3(&builder, expected, + {input_literal.get(), filter_literal.get()}, + error_spec_); } +}; - Array3D input({{{1, 2, 3, 4, 5}, {6, 7, 8, 9, 10}}}); - Array3D filter({{{10, 20}, {30, 40}}}); - - Array3D expected({{{0, 260, 510, 610, 710, 810, 350, 0}}}); - - auto input_literal = - client_->TransferToServer(*Literal::CreateR3FromArray3D(input)) - .ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*Literal::CreateR3FromArray3D(filter)) - .ConsumeValueOrDie(); - - ComputeAndCompareR3(&builder, expected, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} +TYPED_TEST_CASE(Convolve1D_1x2x5_1x2x2_WithPadding, TestTypes); +TYPED_TEST(Convolve1D_1x2x5_1x2x2_WithPadding, Types) { this->RunTest(); } XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { ComputationBuilder builder(client_, TestName()); @@ -389,12 +449,12 @@ XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { } std::vector input_elems(ShapeUtil::ElementsIn(input_shape)); - std::iota(input_elems.begin(), input_elems.end(), 1.0f); + iota(input_elems.begin(), input_elems.end(), 1.0f); auto input_r1 = Literal::CreateR1(input_elems); auto input_r5 = input_r1->Reshape(input_dims).ConsumeValueOrDie(); std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape)); - std::iota(filter_elems.begin(), filter_elems.end(), 1.0f); + iota(filter_elems.begin(), filter_elems.end(), 1.0f); auto filter_r1 = Literal::CreateR1(filter_elems); auto filter_r5 = filter_r1->Reshape(filter_dims).ConsumeValueOrDie(); @@ -412,56 +472,73 @@ XLA_TEST_F(ConvolutionTest, Convolve3D_1x4x2x3x3_2x2x2x3x3_Valid) { error_spec_); } -XLA_TEST_F(ConvolutionTest, Convolve2D_1x3x3x5_3x3x5x5_Valid) { - ComputationBuilder builder(client_, TestName()); - std::vector input_dims = {1, 3, 3, 5}; - std::vector filter_dims = {3, 3, 5, 3}; - Shape input_shape = ShapeUtil::MakeShape(F32, input_dims); - Shape filter_shape = ShapeUtil::MakeShape(F32, filter_dims); - { - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - - // Tensorflow dimension numbers for 2D convolution. - ConvolutionDimensionNumbers dnums; - dnums.set_input_batch_dimension(0); - dnums.set_output_batch_dimension(0); - dnums.add_input_spatial_dimensions(1); - dnums.add_output_spatial_dimensions(1); - dnums.add_input_spatial_dimensions(2); - dnums.add_output_spatial_dimensions(2); - dnums.set_input_feature_dimension(3); - dnums.set_output_feature_dimension(3); - dnums.add_kernel_spatial_dimensions(0); - dnums.add_kernel_spatial_dimensions(1); - dnums.set_kernel_input_feature_dimension(2); - dnums.set_kernel_output_feature_dimension(3); +// std::iota doesn't work when init_value has a type Eigen::half in some build +// servers. The error message is missing the operator ++. +template +void iota_int_init_value(std::vector& values, int init_value) { + std::for_each(values.begin(), values.end(), + [&](T& value) { value = static_cast(init_value++); }); +} - builder.ConvWithGeneralDimensions(input, filter, {1, 1}, Padding::kValid, - dnums); +template +class Convolve2D_1x3x3x5_3x3x5x5_Valid : public ConvolutionTest { + public: + void RunTest() { + ComputationBuilder builder(client_, TestName()); + std::vector input_dims = {1, 3, 3, 5}; + std::vector filter_dims = {3, 3, 5, 3}; + Shape input_shape = MakeShapeWrapper(input_dims); + Shape filter_shape = MakeShapeWrapper(filter_dims); + { + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + + // Tensorflow dimension numbers for 2D convolution. + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(0); + dnums.set_output_batch_dimension(0); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.add_input_spatial_dimensions(2); + dnums.add_output_spatial_dimensions(2); + dnums.set_input_feature_dimension(3); + dnums.set_output_feature_dimension(3); + dnums.add_kernel_spatial_dimensions(0); + dnums.add_kernel_spatial_dimensions(1); + dnums.set_kernel_input_feature_dimension(2); + dnums.set_kernel_output_feature_dimension(3); + + builder.ConvWithGeneralDimensions(input, filter, {1, 1}, Padding::kValid, + dnums); + } + + std::vector input_elems(ShapeUtil::ElementsIn(input_shape)); + iota_int_init_value(input_elems, 1); + auto input_r1 = Literal::CreateR1(input_elems); + auto input_r4 = input_r1->Reshape(input_dims).ConsumeValueOrDie(); + + std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape)); + iota_int_init_value(filter_elems, 1); + auto filter_r1 = Literal::CreateR1(filter_elems); + auto filter_r4 = filter_r1->Reshape(filter_dims).ConsumeValueOrDie(); + + auto expected_r1 = Literal::CreateR1( + {static_cast(92115), static_cast(93150), static_cast(94185)}); + auto expected_r4 = expected_r1->Reshape({1, 1, 1, 3}).ConsumeValueOrDie(); + + auto input_literal = + client_->TransferToServer(*input_r4).ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*filter_r4).ConsumeValueOrDie(); + + ComputeAndCompareLiteral(&builder, *expected_r4, + {input_literal.get(), filter_literal.get()}, + error_spec_); } +}; - std::vector input_elems(ShapeUtil::ElementsIn(input_shape)); - std::iota(input_elems.begin(), input_elems.end(), 1.0f); - auto input_r1 = Literal::CreateR1(input_elems); - auto input_r4 = input_r1->Reshape(input_dims).ConsumeValueOrDie(); - - std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape)); - std::iota(filter_elems.begin(), filter_elems.end(), 1.0f); - auto filter_r1 = Literal::CreateR1(filter_elems); - auto filter_r4 = filter_r1->Reshape(filter_dims).ConsumeValueOrDie(); - - auto expected_r1 = Literal::CreateR1({92115, 93150, 94185}); - auto expected_r4 = expected_r1->Reshape({1, 1, 1, 3}).ConsumeValueOrDie(); - - auto input_literal = client_->TransferToServer(*input_r4).ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*filter_r4).ConsumeValueOrDie(); - - ComputeAndCompareLiteral(&builder, *expected_r4, - {input_literal.get(), filter_literal.get()}, - error_spec_); -} +TYPED_TEST_CASE(Convolve2D_1x3x3x5_3x3x5x5_Valid, TestTypes); +TYPED_TEST(Convolve2D_1x3x3x5_3x3x5x5_Valid, Types) { this->RunTest(); } // Test fixture to run convolution tests with and without convolution // canonicalization enabled. @@ -519,67 +596,117 @@ struct Convolve1DTestParam { int64 num_windows; }; -class Convolve1D1WindowTest +class Convolve1D1WindowTestBase : public ConvolutionTest, - public ::testing::WithParamInterface {}; - -XLA_TEST_P(Convolve1D1WindowTest, Convolve1D1Window) { - ComputationBuilder builder(client_, TestName()); - int64 input_feature = GetParam().input_feature; - int64 output_feature = GetParam().output_feature; - int64 batch = GetParam().batch; - int64 num_windows = GetParam().num_windows; - int64 window_size = GetParam().window_size; - std::vector input_dims = {batch, window_size + num_windows - 1, - input_feature}; - std::vector filter_dims = {window_size, input_feature, output_feature}; - Shape input_shape = ShapeUtil::MakeShape(F32, input_dims); - Shape filter_shape = ShapeUtil::MakeShape(F32, filter_dims); - { - auto input = builder.Parameter(0, input_shape, "input"); - auto filter = builder.Parameter(1, filter_shape, "filter"); - - // Tensorflow dimension numbers for 1D convolution. - ConvolutionDimensionNumbers dnums; - dnums.set_input_batch_dimension(0); - dnums.set_output_batch_dimension(0); - dnums.add_input_spatial_dimensions(1); - dnums.add_output_spatial_dimensions(1); - dnums.set_input_feature_dimension(2); - dnums.set_output_feature_dimension(2); - dnums.add_kernel_spatial_dimensions(0); - dnums.set_kernel_input_feature_dimension(1); - dnums.set_kernel_output_feature_dimension(2); - - builder.ConvWithGeneralDimensions(input, filter, {1}, Padding::kValid, - dnums); + public ::testing::WithParamInterface { + protected: + template + void TestImpl() { + ComputationBuilder builder(client_, TestName()); + int64 input_feature = GetParam().input_feature; + int64 output_feature = GetParam().output_feature; + int64 batch = GetParam().batch; + int64 num_windows = GetParam().num_windows; + int64 window_size = GetParam().window_size; + std::vector input_dims = {batch, window_size + num_windows - 1, + input_feature}; + std::vector filter_dims = {window_size, input_feature, + output_feature}; + Shape input_shape = MakeShapeWrapper(input_dims); + Shape filter_shape = MakeShapeWrapper(filter_dims); + { + auto input = builder.Parameter(0, input_shape, "input"); + auto filter = builder.Parameter(1, filter_shape, "filter"); + + // Tensorflow dimension numbers for 1D convolution. + ConvolutionDimensionNumbers dnums; + dnums.set_input_batch_dimension(0); + dnums.set_output_batch_dimension(0); + dnums.add_input_spatial_dimensions(1); + dnums.add_output_spatial_dimensions(1); + dnums.set_input_feature_dimension(2); + dnums.set_output_feature_dimension(2); + dnums.add_kernel_spatial_dimensions(0); + dnums.set_kernel_input_feature_dimension(1); + dnums.set_kernel_output_feature_dimension(2); + + builder.ConvWithGeneralDimensions(input, filter, {1}, Padding::kValid, + dnums); + } + + std::vector input_elems(ShapeUtil::ElementsIn(input_shape), + static_cast(1.0f)); + auto input_r1 = Literal::CreateR1(input_elems); + auto input_r3 = input_r1->Reshape(input_dims).ConsumeValueOrDie(); + + std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape), + static_cast(1.0f)); + + auto filter_r1 = Literal::CreateR1(filter_elems); + auto filter_r3 = filter_r1->Reshape(filter_dims).ConsumeValueOrDie(); + + std::vector expect_elems(batch * output_feature * num_windows, + static_cast(window_size * input_feature)); + auto expected_r1 = Literal::CreateR1(expect_elems); + auto expected_r3 = + expected_r1->Reshape({batch, num_windows, output_feature}) + .ConsumeValueOrDie(); + + auto input_literal = + client_->TransferToServer(*input_r3).ConsumeValueOrDie(); + auto filter_literal = + client_->TransferToServer(*filter_r3).ConsumeValueOrDie(); + ComputeAndCompareLiteral(&builder, *expected_r3, + {input_literal.get(), filter_literal.get()}, + error_spec_); } +}; - std::vector input_elems(ShapeUtil::ElementsIn(input_shape), 1.0); - auto input_r1 = Literal::CreateR1(input_elems); - auto input_r3 = input_r1->Reshape(input_dims).ConsumeValueOrDie(); +class Convolve1D1WindowTestFloat : public Convolve1D1WindowTestBase {}; - std::vector filter_elems(ShapeUtil::ElementsIn(filter_shape), 1.0); +XLA_TEST_P(Convolve1D1WindowTestFloat, Convolve1D1Window) { TestImpl(); } - auto filter_r1 = Literal::CreateR1(filter_elems); - auto filter_r3 = filter_r1->Reshape(filter_dims).ConsumeValueOrDie(); +INSTANTIATE_TEST_CASE_P( + Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestFloat, + ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, + Convolve1DTestParam{160, 1, 1, 5, 1}, + Convolve1DTestParam{24, 1, 1, 20, 1}, + Convolve1DTestParam{30, 1, 1, 20, 1}, + Convolve1DTestParam{23, 1, 1, 20, 20}, + Convolve1DTestParam{25, 1, 1, 20, 1}, + Convolve1DTestParam{24, 1, 1, 10, 5}, + Convolve1DTestParam{160, 1, 1, 10, 1}, + Convolve1DTestParam{255, 1, 1, 3, 1}, + Convolve1DTestParam{130, 1, 1, 1, 3}, + Convolve1DTestParam{64, 1, 1, 1, 1}, + Convolve1DTestParam{128, 1, 1, 1, 1}, + Convolve1DTestParam{139, 1, 1, 128, 1}, + Convolve1DTestParam{1, 10, 10, 1, 10}, + Convolve1DTestParam{1, 10, 130, 1, 2}, + Convolve1DTestParam{1, 10, 130, 1, 1}, + Convolve1DTestParam{1, 64, 64, 1, 10}, + Convolve1DTestParam{1, 65, 65, 1, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{128, 128, 128, 128, 1}, + Convolve1DTestParam{1, 128, 128, 1, 1}, + Convolve1DTestParam{2, 2, 2, 2, 1}, + Convolve1DTestParam{161, 1, 1, 10, 1}, + Convolve1DTestParam{900, 1, 1, 10, 1}, + Convolve1DTestParam{640, 3, 3, 128, 1}) - std::vector expect_elems(batch * output_feature * num_windows, - window_size * input_feature); - auto expected_r1 = Literal::CreateR1(expect_elems); - auto expected_r3 = expected_r1->Reshape({batch, num_windows, output_feature}) - .ConsumeValueOrDie(); +); - auto input_literal = client_->TransferToServer(*input_r3).ConsumeValueOrDie(); - auto filter_literal = - client_->TransferToServer(*filter_r3).ConsumeValueOrDie(); - ComputeAndCompareLiteral(&builder, *expected_r3, - {input_literal.get(), filter_literal.get()}, - error_spec_); +#if (XLA_TEST_BACKEND_GPU || XLA_TEST_BACKEND_CPU) +class Convolve1D1WindowTestHalf : public Convolve1D1WindowTestBase {}; + +// TODO(b/72509305): Enable half data type tests for CPU. +XLA_TEST_P(Convolve1D1WindowTestHalf, + DISABLED_ON_CPU_PARALLEL(DISABLED_ON_CPU(Convolve1D1Window))) { + TestImpl(); } INSTANTIATE_TEST_CASE_P( - Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTest, + Convolve1D1WindowTest_Instantiation, Convolve1D1WindowTestHalf, ::testing::Values(Convolve1DTestParam{1, 1, 1, 1, 2}, Convolve1DTestParam{160, 1, 1, 5, 1}, Convolve1DTestParam{24, 1, 1, 20, 1}, @@ -592,7 +719,11 @@ INSTANTIATE_TEST_CASE_P( Convolve1DTestParam{130, 1, 1, 1, 3}, Convolve1DTestParam{64, 1, 1, 1, 1}, Convolve1DTestParam{128, 1, 1, 1, 1}, + // TODO(b/72566306): the following three tests fail on CPU + // backend due to result miscompare. Convolve1DTestParam{139, 1, 1, 128, 1}, + Convolve1DTestParam{640, 3, 3, 128, 1}, + Convolve1DTestParam{900, 1, 1, 10, 1}, Convolve1DTestParam{1, 10, 10, 1, 10}, Convolve1DTestParam{1, 10, 130, 1, 2}, Convolve1DTestParam{1, 10, 130, 1, 1}, @@ -602,11 +733,10 @@ INSTANTIATE_TEST_CASE_P( Convolve1DTestParam{128, 128, 128, 128, 1}, Convolve1DTestParam{1, 128, 128, 1, 1}, Convolve1DTestParam{2, 2, 2, 2, 1}, - Convolve1DTestParam{161, 1, 1, 10, 1}, - Convolve1DTestParam{900, 1, 1, 10, 1}, - Convolve1DTestParam{640, 3, 3, 128, 1}) + Convolve1DTestParam{161, 1, 1, 10, 1}) ); +#endif TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); diff --git a/tensorflow/compiler/xla/tests/test_macros.h b/tensorflow/compiler/xla/tests/test_macros.h index cc4eaf62f5..e2d406f66d 100644 --- a/tensorflow/compiler/xla/tests/test_macros.h +++ b/tensorflow/compiler/xla/tests/test_macros.h @@ -161,4 +161,31 @@ string PrependDisabledIfIndicated(const string& test_case_name, #define XLA_TEST_P(test_case_name, test_name) \ XLA_TEST_P_IMPL_(test_case_name, test_name) + +// This is identical to the TEST_F macro from "gtest", but it potentially +// disables the test based on an external manifest file, DISABLED_MANIFEST. +#define XLA_TYPED_TEST(CaseName, TestName) \ + template \ + class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ + : public CaseName { \ + private: \ + typedef CaseName TestFixture; \ + typedef gtest_TypeParam_ TypeParam; \ + virtual void TestBody(); \ + }; \ + bool gtest_##CaseName##_##TestName##_registered_ GTEST_ATTRIBUTE_UNUSED_ = \ + ::testing::internal::TypeParameterizedTest< \ + CaseName, \ + ::testing::internal::TemplateSel, \ + GTEST_TYPE_PARAMS_(CaseName)>:: \ + Register( \ + "", ::testing::internal::CodeLocation(__FILE__, __LINE__), \ + #CaseName, \ + ::xla::PrependDisabledIfIndicated(#CaseName, #TestName).c_str(), \ + 0); \ + template \ + void GTEST_TEST_CLASS_NAME_(CaseName, \ + TestName)::TestBody() + #endif // TENSORFLOW_COMPILER_XLA_TESTS_TEST_MACROS_H_ -- GitLab From 18bb356e5c0d61f5c0df78ba2948a26165048eb6 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 11:17:32 -0800 Subject: [PATCH 222/629] Indentation fix --- tensorflow/python/ops/image_ops_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index d8d37b282f..e0dca9e407 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1121,9 +1121,8 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): p_unknown_dims_4 = array_ops.placeholder( dtypes.uint8, shape=[None, None, None, None]) p_unknown_width = array_ops.placeholder(dtypes.uint8, shape=[64, None, 3]) - p_unknown_batch = array_ops.placeholder(dtypes.uint8, - shape=[None, 64, 64, 3]) - + p_unknown_batch = array_ops.placeholder(dtypes.uint8, + shape=[None, 64, 64, 3]) p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None]) p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3]) -- GitLab From 0e95ea86e671344259751469b030045e322d503d Mon Sep 17 00:00:00 2001 From: Sandeep N Gupta <32845615+sandeepngupta@users.noreply.github.com> Date: Thu, 15 Feb 2018 11:19:10 -0800 Subject: [PATCH 223/629] Updated roadmap --- tensorflow/docs_src/about/roadmap.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/docs_src/about/roadmap.md b/tensorflow/docs_src/about/roadmap.md index ce9e619b10..1f934acab6 100644 --- a/tensorflow/docs_src/about/roadmap.md +++ b/tensorflow/docs_src/about/roadmap.md @@ -1,5 +1,5 @@ # Roadmap -**Last updated: Feb 12, 2018** +**Last updated: Feb 15, 2018** TensorFlow is a rapidly moving, community supported project. This document is intended to provide guidance about priorities and focus areas of the core set of TensorFlow @@ -25,7 +25,7 @@ expected in the next one to two releases. #### Keras API: * Better integration with tf.data (ability to call `model.fit` with data tensors) -* Full support for Eager execution (both Eager support for the regular Keras API, and ability +* Full support for Eager Execution (both Eager support for the regular Keras API, and ability to create Keras models Eager- style via Model subclassing) * Better distribution/multi-GPU support and TPU support (including a smoother model-to-estimator workflow) @@ -49,10 +49,10 @@ across image recognition, speech, object detection, and * Edward 2.0: High-level API for probabilistic programming ### Platforms -#### TFLite: -* Increased coverage of supported ops in TFLite -* Easier conversion of a trained TF graph for use on TFLite -* Support for GPU acceleration in TFLite (iOS and Andorid) +#### TensorFlow Lite: +* Increased coverage of supported ops in TensorFlow Lite +* Easier conversion of a trained TensorFlow graph for use on TensorFlow Lite +* Support for GPU acceleration in TensorFlow Lite (iOS and Android) * Support for hardware accelerators via Android NeuralNets API * Improved CPU performance by quantization and other network optimizations (eg. pruning, distillation) * Increased support for devices beyond Android and iOS (eg. RPi, Cortex-M) @@ -76,11 +76,11 @@ across image recognition, speech, object detection, and #### Special Interest Groups: * Mobilizing the community to work together in focused domains * [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute) -: build and packaging of TF +: build and packaging of TensorFlow * More to be identified and launched #### Community: * Incorporate public feedback on significant design decisions via a Request-for-Comment (RFC) process * Formalize process for external contributions to land in TensorFlow and associated projects -* Grow global TF communities and user groups +* Grow global TensorFlow communities and user groups * Collaborate with partners to co-develop and publish research papers -- GitLab From 9cd373548a71c13d4fa54f222547073dc9d2606c Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 11:23:47 -0800 Subject: [PATCH 224/629] Linter fixes --- tensorflow/python/tools/freeze_graph.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index 5d82aa6f47..d03a8bb977 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -109,7 +109,7 @@ def freeze_graph_with_def_protos(input_graph_def, input_meta_graph_def, clear_devices=True) restorer.restore(sess, input_checkpoint) if initializer_nodes: - sess.run(initializer_nodes.replace(' ','').split(",")) + sess.run(initializer_nodes.replace(' ', '').split(",")) elif input_saved_model_dir: if saved_model_tags is None: saved_model_tags = [] @@ -130,25 +130,27 @@ def freeze_graph_with_def_protos(input_graph_def, var_list=var_list, write_version=checkpoint_version) saver.restore(sess, input_checkpoint) if initializer_nodes: - sess.run(initializer_nodes.replace(' ','').split(",")) + sess.run(initializer_nodes.replace(' ', '').split(",")) - variable_names_whitelist = (variable_names_whitelist.replace(' ','').split(",") - if variable_names_whitelist else None) - variable_names_blacklist = (variable_names_blacklist.replace(' ','').split(",") - if variable_names_blacklist else None) + variable_names_whitelist = ( + variable_names_whitelist.replace(' ', '').split(",") + if variable_names_whitelist else None) + variable_names_blacklist = ( + variable_names_blacklist.replace(' ', '').split(",") + if variable_names_blacklist else None) if input_meta_graph_def: output_graph_def = graph_util.convert_variables_to_constants( sess, input_meta_graph_def.graph_def, - output_node_names.replace(' ','').split(","), + output_node_names.replace(' ', '').split(","), variable_names_whitelist=variable_names_whitelist, variable_names_blacklist=variable_names_blacklist) else: output_graph_def = graph_util.convert_variables_to_constants( sess, input_graph_def, - output_node_names.replace(' ','').split(","), + output_node_names.replace(' ', '').split(","), variable_names_whitelist=variable_names_whitelist, variable_names_blacklist=variable_names_blacklist) @@ -250,7 +252,7 @@ def freeze_graph(input_graph, variable_names_blacklist, input_meta_graph_def, input_saved_model_dir, - saved_model_tags.replace(' ','').split(","), + saved_model_tags.replace(' ', '').split(","), checkpoint_version=checkpoint_version) -- GitLab From 24a31d2b59feb1fde519ea36a2fa9cda8860ccf9 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 11:27:23 -0800 Subject: [PATCH 225/629] Kill all the tabs --- tensorflow/python/ops/image_ops_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index e0dca9e407..d944b803f2 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1121,8 +1121,8 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase): p_unknown_dims_4 = array_ops.placeholder( dtypes.uint8, shape=[None, None, None, None]) p_unknown_width = array_ops.placeholder(dtypes.uint8, shape=[64, None, 3]) - p_unknown_batch = array_ops.placeholder(dtypes.uint8, - shape=[None, 64, 64, 3]) + p_unknown_batch = array_ops.placeholder(dtypes.uint8, + shape=[None, 64, 64, 3]) p_wrong_rank = array_ops.placeholder(dtypes.uint8, shape=[None, None]) p_zero_dim = array_ops.placeholder(dtypes.uint8, shape=[64, 0, 3]) -- GitLab From 6c7e9a9a7062c6e68056192d547de3937dc73597 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 11:31:44 -0800 Subject: [PATCH 226/629] Indentation --- tensorflow/python/tools/freeze_graph.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index d03a8bb977..f7a578e52b 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -133,11 +133,11 @@ def freeze_graph_with_def_protos(input_graph_def, sess.run(initializer_nodes.replace(' ', '').split(",")) variable_names_whitelist = ( - variable_names_whitelist.replace(' ', '').split(",") - if variable_names_whitelist else None) + variable_names_whitelist.replace(' ', '').split(",") + if variable_names_whitelist else None) variable_names_blacklist = ( - variable_names_blacklist.replace(' ', '').split(",") - if variable_names_blacklist else None) + variable_names_blacklist.replace(' ', '').split(",") + if variable_names_blacklist else None) if input_meta_graph_def: output_graph_def = graph_util.convert_variables_to_constants( -- GitLab From 9d1d0253a2d634c0fd1f1db53b6e4922b8e92f28 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 15 Feb 2018 11:31:01 -0800 Subject: [PATCH 227/629] [HLOEval] Logical right shift returns 0 if shift amount exceeds bitwidth. PiperOrigin-RevId: 185871170 --- tensorflow/compiler/xla/service/hlo_evaluator.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index eb6e9feb7c..8016b38d15 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -794,6 +794,10 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { TF_ASSIGN_OR_RETURN( parent_->evaluated_[shr], ElementWiseBinaryOp(shr, [](NativeT lhs_elem, NativeT rhs_elem) { + // If shift amount is greater than the number of bits, then return 0. + if (rhs_elem >= sizeof(UnsignedT) * CHAR_BIT) { + return static_cast(0); + } return static_cast(static_cast(lhs_elem) >> rhs_elem); })); -- GitLab From 5dd585abb84c5d13af0017f78741e29505f7b5f7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 11:34:10 -0800 Subject: [PATCH 228/629] Make conversions from ShapedBuffer <-> ScopedShapedBuffer efficient by moving memory ownership instead of copying. PiperOrigin-RevId: 185871648 --- .../compiler/xla/client/local_client.cc | 4 +- .../compiler/xla/service/shaped_buffer.cc | 39 +++++++++++++++---- .../compiler/xla/service/shaped_buffer.h | 26 +++++++++---- tensorflow/compiler/xla/shape_tree.h | 12 ++++++ 4 files changed, 66 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/client/local_client.cc b/tensorflow/compiler/xla/client/local_client.cc index ef98dbb640..91396f055f 100644 --- a/tensorflow/compiler/xla/client/local_client.cc +++ b/tensorflow/compiler/xla/client/local_client.cc @@ -172,7 +172,9 @@ StatusOr> LocalExecutable::Run( std::unique_ptr result, executable_->ExecuteOnStreamWrapper( &service_options, run_options.execution_profile(), arguments)); - return ScopedShapedBuffer::MakeScoped(result.get(), run_options.allocator()); + + return MakeUnique(std::move(*result), + run_options.allocator()); } StatusOr> LocalExecutable::ExecuteAndDump( diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc index c679d401c3..6e9986165f 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.cc +++ b/tensorflow/compiler/xla/service/shaped_buffer.cc @@ -41,7 +41,32 @@ ShapedBuffer::ShapedBuffer(const Shape& on_host_shape, on_device_shape_(on_device_shape), platform_(platform), device_ordinal_(device_ordinal), - buffers_(on_device_shape) {} + buffers_(&on_device_shape_) {} + +ShapedBuffer::ShapedBuffer(ShapedBuffer&& s) + : on_host_shape_(std::move(s.on_host_shape_)), + on_device_shape_(std::move(s.on_device_shape_)), + platform_(s.platform_), + device_ordinal_(s.device_ordinal_), + buffers_(std::move(s.buffers_)) { + // s.buffers_ has a pointer to s.on_device_shape_. When we move s.buffers_ + // into buffers_, we also need to update this pointer so that buffers_ doesn't + // point into s. + buffers_.replace_shape_ptr(&on_device_shape_); +} + +ShapedBuffer& ShapedBuffer::operator=(ShapedBuffer&& s) { + on_host_shape_ = std::move(s.on_host_shape_); + on_device_shape_ = std::move(s.on_device_shape_); + platform_ = s.platform_; + device_ordinal_ = s.device_ordinal_; + buffers_ = std::move(s.buffers_); + // buffers_ has a pointer to its on_device_shape_. When we move s.buffers_ + // into buffers_, we also need to update this pointer so that buffers_ doesn't + // point into s. + buffers_.replace_shape_ptr(&on_device_shape_); + return *this; +} void ShapedBuffer::clear() { for (auto& pair : buffers_) { @@ -99,6 +124,10 @@ ScopedShapedBuffer::ScopedShapedBuffer(const Shape& on_host_shape, device_ordinal), allocator_(allocator) {} +ScopedShapedBuffer::ScopedShapedBuffer(ShapedBuffer shaped_buffer, + DeviceMemoryAllocator* allocator) + : ShapedBuffer(std::move(shaped_buffer)), allocator_(allocator) {} + ScopedShapedBuffer::~ScopedShapedBuffer() { // Deallocate all non-null buffers. A buffer may appear in more than one spot // in the shape (eg, a tuple with a repeated element) so keep track of what @@ -116,12 +145,8 @@ ScopedShapedBuffer::~ScopedShapedBuffer() { } std::unique_ptr ScopedShapedBuffer::release() { - auto shaped_buffer = MakeUnique( - on_host_shape(), on_device_shape(), platform(), device_ordinal()); - - shaped_buffer->buffers() = buffers(); - clear(); - + auto shaped_buffer = MakeUnique(std::move(*this)); + buffers_ = ShapeTree(); return shaped_buffer; } diff --git a/tensorflow/compiler/xla/service/shaped_buffer.h b/tensorflow/compiler/xla/service/shaped_buffer.h index d397e47d2c..b816df8385 100644 --- a/tensorflow/compiler/xla/service/shaped_buffer.h +++ b/tensorflow/compiler/xla/service/shaped_buffer.h @@ -87,18 +87,24 @@ class ShapedBuffer { string ToString() const; + ShapedBuffer(ShapedBuffer&& s); + ShapedBuffer& operator=(ShapedBuffer&&); + protected: + ShapedBuffer(const ShapedBuffer&) = delete; + ShapedBuffer& operator=(const ShapedBuffer&) = delete; + // The shape of the data when represented on the host. - const Shape on_host_shape_; + Shape on_host_shape_; // The shape of the data on the device. - const Shape on_device_shape_; + Shape on_device_shape_; // The platform the memory is allocated on. const perftools::gputools::Platform* platform_; // The device the memory is allocated on. - const int device_ordinal_; + int device_ordinal_; // The tree of device buffers. Its shape is on_device_shape(). ShapeTree buffers_; @@ -121,14 +127,20 @@ class ScopedShapedBuffer : public ShapedBuffer { ScopedShapedBuffer(const Shape& on_host_shape, const Shape& on_device_shape, DeviceMemoryAllocator* allocator, int device_ordinal); + // Create a ScopedShapedBuffer by taking over the memory from the incoming + // ShapedBuffer. + ScopedShapedBuffer(ShapedBuffer shaped_buffer, + DeviceMemoryAllocator* allocator); + // Return the allocator used to allocate the device memory held in this // ScopedShapedBuffer. DeviceMemoryAllocator* memory_allocator() const { return allocator_; } - // Release all device memory owned by this ScopedShapedBuffer and return the - // device memory pointers in the form of a ShapedBuffer. Device memory - // pointers in this ScopedShapedBuffer object are set to null. This method is - // analogous to std::unique_ptr::release(). + // Release all device memory owned by this ScopedShapedBuffer and + // return the device memory pointers in the form of a + // ShapedBuffer. The returned ShapedBuffer takes over the memory + // from the ScopedShapedBuffer. The resulting ScopedShapedBuffer can + // only be destroyed. std::unique_ptr release(); // All buffers in the shape are deallocated on destruction. diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index d752619bd6..280f02e886 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -143,6 +143,18 @@ class ShapeTree { // Return the shape represented with this ShapeTree. const Shape& shape() const { return *shape_; } + // Replaces *only* the underlying shape of this ShapeTree. The caller must own + // the Shape object and hence shape_storage_ is not updated. + // + // Only safe to use this if the ShapeTree was constructed with 'explicit + // ShapeTree(const Shape* shape)' or is moved from one such ShapeTree. The + // caller must ensure that the input shape is consistent with the underlying + // tree. + void replace_shape_ptr(const Shape* shape) { + CHECK(shape_storage_.get() == nullptr); + shape_ = shape; + } + // Returns true if the node at the given index is a leaf node (an array // shape). bool IsLeaf(const ShapeIndex& index) const { -- GitLab From 6d79acccfd6a4f0c2d1b59ceaa991fbf228b660a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 11:38:03 -0800 Subject: [PATCH 229/629] Implementation of tf.nn.top_k in TfLite PiperOrigin-RevId: 185872292 --- tensorflow/contrib/lite/kernels/BUILD | 14 + tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/kernels/topk_v2.cc | 232 + .../contrib/lite/kernels/topk_v2_test.cc | 155 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 5046 +++++++---------- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 37 +- .../contrib/lite/toco/export_tensorflow.cc | 14 + .../propagate_fixed_sizes.cc | 41 +- .../contrib/lite/toco/import_tensorflow.cc | 34 + tensorflow/contrib/lite/toco/model.h | 9 + .../contrib/lite/toco/tflite/operator.cc | 16 + .../contrib/lite/toco/tflite/operator_test.cc | 7 + tensorflow/contrib/lite/toco/tooling_util.cc | 1 + 17 files changed, 2537 insertions(+), 3079 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/topk_v2.cc create mode 100644 tensorflow/contrib/lite/kernels/topk_v2_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 5d553def0a..d80c8bb671 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -133,6 +133,7 @@ cc_library( "strided_slice.cc", "sub.cc", "svdf.cc", + "topk_v2.cc", "transpose.cc", "unidirectional_sequence_lstm.cc", "unidirectional_sequence_rnn.cc", @@ -401,6 +402,19 @@ tf_cc_test( ], ) +tf_cc_test( + name = "topk_v2_test", + size = "small", + srcs = ["topk_v2_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:builtin_op_data", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "resize_bilinear_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 0f365078cd..fa870ddb40 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -61,6 +61,7 @@ TfLiteRegistration* Register_MEAN(); TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_EXP(); +TfLiteRegistration* Register_TOPK_V2(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -110,6 +111,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE()); AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); AddBuiltin(BuiltinOperator_EXP, Register_EXP()); + AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2()); } TfLiteRegistration* BuiltinOpResolver::FindOp( diff --git a/tensorflow/contrib/lite/kernels/topk_v2.cc b/tensorflow/contrib/lite/kernels/topk_v2.cc new file mode 100644 index 0000000000..807e84609f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/topk_v2.cc @@ -0,0 +1,232 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +namespace tflite { +namespace ops { +namespace builtin { +namespace topk_v2 { +constexpr int kInputTensor = 0; +constexpr int kInputTopK = 1; +constexpr int kOutputIndexes = 0; +constexpr int kOutputValues = 1; + +namespace { +TfLiteStatus ResizeOutput(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* top_k = GetInput(context, node, kInputTopK); + // INT32 number of top results is supported. + TF_LITE_ENSURE_EQ(context, top_k->type, kTfLiteInt32); + // Check that the tensor contains only one value. + TF_LITE_ENSURE_EQ(context, NumDimensions(top_k), 1); + TF_LITE_ENSURE_EQ(context, NumElements(top_k), 1); + const int32 k = top_k->data.i32[0]; + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + const int num_dimensions = NumDimensions(input); + // Check that input has one or more dimensions. + TF_LITE_ENSURE_MSG(context, input->dims->size >= 1, + "TopK k input must have 1 or more dimensions."); + // Check that k is less or equal the internal dimension. + TF_LITE_ENSURE_MSG(context, k <= input->dims->data[num_dimensions - 1], + "TopK k is higher than the internal dimension."); + + TfLiteIntArray* output_indexes_shape = TfLiteIntArrayCreate(num_dimensions); + TfLiteIntArray* output_values_shape = TfLiteIntArrayCreate(num_dimensions); + for (int i = 0; i < num_dimensions - 1; ++i) { + output_indexes_shape->data[i] = input->dims->data[i]; + output_values_shape->data[i] = input->dims->data[i]; + } + output_indexes_shape->data[num_dimensions - 1] = k; + output_values_shape->data[num_dimensions - 1] = k; + TfLiteTensor* output_indexes = GetOutput(context, node, kOutputIndexes); + TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); + auto resize_tensor = [context](TfLiteTensor* tensor, TfLiteIntArray* new_size, + TfLiteIntArray* delete_on_error) { + TfLiteStatus status = context->ResizeTensor(context, tensor, new_size); + if (status != kTfLiteOk) { + TfLiteIntArrayFree(new_size); + if (delete_on_error != nullptr) { + TfLiteIntArrayFree(delete_on_error); + } + } + return status; + }; + TF_LITE_ENSURE_OK(context, resize_tensor(output_indexes, output_indexes_shape, + output_values_shape)); + TF_LITE_ENSURE_OK(context, + resize_tensor(output_values, output_values_shape, nullptr)); + return kTfLiteOk; +} + +// The class that collects top indexes of k values. Based on template +// tensorflow::gtl::TopN<> but, for optimization, +// it re-uses the same container. +template +class TopContainer { + public: + TopContainer() = delete; + TopContainer(int32 k, int32 row_size) : k_(k) { + container_.reserve(std::min(k, row_size) + 1); + } + + void start_collecting(const T* values) { + values_ = values; + container_.clear(); + } + void push(int32 a) { + auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; + if (container_.size() <= k_) { + container_.push_back(a); + if (container_.size() == k_ + 1) { + std::make_heap(container_.begin(), container_.end(), comparator); + std::pop_heap(container_.begin(), container_.end(), comparator); + } + } else if (comparator(a, container_.front())) { + container_.back() = a; + std::push_heap(container_.begin(), container_.end(), comparator); + std::pop_heap(container_.begin(), container_.end(), comparator); + } + } + + const std::vector& sorted_result() { + auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; + if (container_.size() <= k_) { + std::sort(container_.begin(), container_.end(), comparator); + } else { + std::sort_heap(container_.begin(), container_.end() - 1, comparator); + container_.resize(k_); + } + return container_; + } + + private: + int32 k_; + std::vector container_; + const T* values_ = nullptr; + + bool compare_fun(int32 a, int32 b) const { + if (values_[b] < values_[a]) { + return true; + } else if (values_[b] > values_[a]) { + return false; + } else { + return a < b; + } + } +}; + +// Mostly modeled on tensorflow/core/kernels/topk_op.cc for CPU. +template +void TopK(int32 row_size, int32 num_rows, const T* data, int32 k, + int32* output_indexes, T* output_values) { + TopContainer topc(k, row_size); + for (int row = 0; row < num_rows; ++row) { + const T* values_row = data + row * row_size; + topc.start_collecting(values_row); + for (int32 c = 0; c < row_size; ++c) { + topc.push(c); + } + + // Prepare output buffers. + int32* indexes_row = output_indexes + row * k; + T* output_row = output_values + row * k; + // We always assume that the output is sorted. + const auto& top_k = topc.sorted_result(); + std::copy(top_k.begin(), top_k.end(), indexes_row); + std::transform(top_k.begin(), top_k.end(), output_row, + [values_row](const int32 loc) { return values_row[loc]; }); + } +} + +} // namespace + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + // Check that the inputs and outputs have the right sizes and types. + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 2); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); + TF_LITE_ENSURE_EQ(context, input->type, output_values->type); + + TfLiteTensor* top_k = GetInput(context, node, kInputTopK); + TF_LITE_ENSURE_EQ(context, top_k->type, kTfLiteInt32); + + // Set output dynamic if the input is not const. + if (IsConstantTensor(top_k)) { + TF_LITE_ENSURE_OK(context, ResizeOutput(context, node)); + } else { + TfLiteTensor* output_indexes = GetOutput(context, node, kOutputIndexes); + TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); + SetTensorToDynamic(output_indexes); + SetTensorToDynamic(output_values); + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); + TfLiteTensor* output_indexes = GetOutput(context, node, kOutputIndexes); + if (IsDynamicTensor(output_values)) { + TF_LITE_ENSURE_OK(context, ResizeOutput(context, node)); + } + TfLiteTensor* top_k = GetInput(context, node, kInputTopK); + const int32 k = top_k->data.i32[0]; + // The tensor can have more than 2 dimensions or even be a vector, the code + // anyway calls the internal dimension as row; + TfLiteTensor* input = GetInput(context, node, kInputTensor); + const int32 row_size = input->dims->data[input->dims->size - 1]; + int32 num_rows = 1; + for (int i = 0; i < input->dims->size - 1; ++i) { + num_rows *= input->dims->data[i]; + } + switch (output_values->type) { + case kTfLiteFloat32: + TopK(row_size, num_rows, input->data.f, k, output_indexes->data.i32, + output_values->data.f); + break; + case kTfLiteUInt8: + TopK(row_size, num_rows, input->data.uint8, k, output_indexes->data.i32, + output_values->data.uint8); + break; + case kTfLiteInt32: + TopK(row_size, num_rows, input->data.i32, k, output_indexes->data.i32, + output_values->data.i32); + break; + case kTfLiteInt64: + TopK(row_size, num_rows, input->data.i64, k, output_indexes->data.i32, + output_values->data.i64); + break; + default: + context->ReportError(context, "Type is currently not supported by TopK."); + return kTfLiteError; + } + + return kTfLiteOk; +} +} // namespace topk_v2 +TfLiteRegistration* Register_TOPK_V2() { + static TfLiteRegistration r = {nullptr, nullptr, topk_v2::Prepare, + topk_v2::Eval}; + return &r; +} +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/topk_v2_test.cc b/tensorflow/contrib/lite/kernels/topk_v2_test.cc new file mode 100644 index 0000000000..29f2a057cd --- /dev/null +++ b/tensorflow/contrib/lite/kernels/topk_v2_test.cc @@ -0,0 +1,155 @@ + +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class TopKV2OpModel : public SingleOpModel { + public: + TopKV2OpModel(std::initializer_list input_shape, TensorType input_type, + int top_k) { + input_ = AddInput(input_type); + top_k_ = AddInput(TensorType_INT32); + output_indexes_ = AddOutput(TensorType_INT32); + output_values_ = AddOutput(input_type); + SetBuiltinOp(BuiltinOperator_TOPK_V2, BuiltinOptions_TopKV2Options, 0); + BuildInterpreter({input_shape, {1}}); + PopulateTensor(top_k_, {top_k}); + } + + void SetInputFloat(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputUInt8(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputInt32(std::initializer_list data) { + PopulateTensor(input_, data); + } + + void SetInputInt64(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetIndexes() { + return ExtractVector(output_indexes_); + } + + std::vector GetValuesFloat() { + return ExtractVector(output_values_); + } + + std::vector GetValuesUInt8() { + return ExtractVector(output_values_); + } + + std::vector GetValuesInt32() { + return ExtractVector(output_values_); + } + + std::vector GetValuesInt64() { + return ExtractVector(output_values_); + } + + protected: + int input_; + int top_k_; + int output_indexes_; + int output_values_; +}; + +// The test where the tensor dimension is equal to top. +TEST(TopKV2OpTest, EqualFloat) { + TopKV2OpModel m({2, 2}, TensorType_FLOAT32, 2); + m.SetInputFloat({-2.0, 0.2, 0.8, 0.1}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({1, 0, 0, 1})); + EXPECT_THAT(m.GetValuesFloat(), + ElementsAreArray(ArrayFloatNear({0.2, -2.0, 0.8, 0.1}))); +} + +// Test when internal dimension is k+1. +TEST(TopKV2OpTest, BorderFloat) { + TopKV2OpModel m({2, 3}, TensorType_FLOAT32, 2); + m.SetInputFloat({-2.0, -3.0, 0.2, 0.8, 0.1, -0.1}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({2, 0, 0, 1})); + EXPECT_THAT(m.GetValuesFloat(), + ElementsAreArray(ArrayFloatNear({0.2, -2.0, 0.8, 0.1}))); +} +// Test when internal dimension is higher than k. +TEST(TopKV2OpTest, LargeFloat) { + TopKV2OpModel m({2, 4}, TensorType_FLOAT32, 2); + m.SetInputFloat({-2.0, -3.0, -4.0, 0.2, 0.8, 0.1, -0.1, -0.8}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({3, 0, 0, 1})); + EXPECT_THAT(m.GetValuesFloat(), + ElementsAreArray(ArrayFloatNear({0.2, -2.0, 0.8, 0.1}))); +} + +// Test 1D case. +TEST(TopKV2OpTest, VectorFloat) { + TopKV2OpModel m({8}, TensorType_FLOAT32, 2); + m.SetInputFloat({-2.0, -3.0, -4.0, 0.2, 0.8, 0.1, -0.1, -0.8}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({4, 3})); + EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(ArrayFloatNear({0.8, 0.2}))); +} + +// Check that uint8 works. +TEST(TopKV2OpTest, TypeUint8) { + TopKV2OpModel m({2, 3}, TensorType_UINT8, 2); + m.SetInputUInt8({1, 2, 3, 251, 250, 249}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({2, 1, 0, 1})); + EXPECT_THAT(m.GetValuesUInt8(), ElementsAreArray({3, 2, 251, 250})); +} + +// Check that int32 works. +TEST(TopKV2OpTest, TypeInt32) { + TopKV2OpModel m({2, 3}, TensorType_INT32, 2); + m.SetInputInt32({1, 2, 3, 10251, 10250, 10249}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({2, 1, 0, 1})); + EXPECT_THAT(m.GetValuesInt32(), ElementsAreArray({3, 2, 10251, 10250})); +} + +// Check that int64 works. +TEST(TopKV2OpTest, TypeInt64) { + TopKV2OpModel m({2, 3}, TensorType_INT64, 2); + m.SetInputInt64({1, 2, 3, -1, -2, -3}); + m.Invoke(); + EXPECT_THAT(m.GetIndexes(), ElementsAreArray({2, 1, 0, 1})); + EXPECT_THAT(m.GetValuesInt64(), ElementsAreArray({3, 2, -1, -2})); +} +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 2ee0cac11c..92922a1460 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -279,6 +279,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_RELU6: case BuiltinOperator_CONCAT_EMBEDDINGS: case BuiltinOperator_EXP: + case BuiltinOperator_TOPK_V2: break; case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 77084b4dc8..a83349d95f 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -335,6 +335,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_GATHER: case tflite::BuiltinOperator_SPACE_TO_BATCH_ND: case tflite::BuiltinOperator_BATCH_TO_SPACE_ND: + case tflite::BuiltinOperator_TOPK_V2: case tflite::BuiltinOperator_TRANSPOSE: case tflite::BuiltinOperator_MEAN: case tflite::BuiltinOperator_DIV: diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index ef8f39cf55..7ec19a0612 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -121,6 +121,7 @@ enum BuiltinOperator : byte { STRIDED_SLICE = 45, BIDIRECTIONAL_SEQUENCE_RNN = 46, EXP = 47, + TOPK_V2=48, } // Options for the builtin operators. @@ -158,6 +159,7 @@ union BuiltinOptions { SequenceRNNOptions, StridedSliceOptions, ExpOptions, + TopKV2Options, } enum Padding : byte { SAME, VALID } @@ -317,6 +319,9 @@ table DivOptions { fused_activation_function:ActivationFunctionType; } +table TopKV2Options { +} + enum CombinerType : byte { SUM = 0, MEAN = 1, diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 40b50bab45..16cda10c51 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ // automatically generated by the FlatBuffers compiler, do not modify + #ifndef FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ #define FLATBUFFERS_GENERATED_SCHEMA_TFLITE_H_ @@ -108,6 +109,9 @@ struct SubOptionsT; struct DivOptions; struct DivOptionsT; +struct TopKV2Options; +struct TopKV2OptionsT; + struct EmbeddingLookupSparseOptions; struct EmbeddingLookupSparseOptionsT; @@ -156,15 +160,27 @@ enum TensorType { }; inline TensorType (&EnumValuesTensorType())[6] { - static TensorType values[] = {TensorType_FLOAT32, TensorType_FLOAT16, - TensorType_INT32, TensorType_UINT8, - TensorType_INT64, TensorType_STRING}; + static TensorType values[] = { + TensorType_FLOAT32, + TensorType_FLOAT16, + TensorType_INT32, + TensorType_UINT8, + TensorType_INT64, + TensorType_STRING + }; return values; } inline const char **EnumNamesTensorType() { - static const char *names[] = {"FLOAT32", "FLOAT16", "INT32", "UINT8", - "INT64", "STRING", nullptr}; + static const char *names[] = { + "FLOAT32", + "FLOAT16", + "INT32", + "UINT8", + "INT64", + "STRING", + nullptr + }; return names; } @@ -219,110 +235,116 @@ enum BuiltinOperator { BuiltinOperator_STRIDED_SLICE = 45, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, BuiltinOperator_EXP = 47, + BuiltinOperator_TOPK_V2 = 48, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_EXP + BuiltinOperator_MAX = BuiltinOperator_TOPK_V2 }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[45] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[46] { static BuiltinOperator values[] = { - BuiltinOperator_ADD, - BuiltinOperator_AVERAGE_POOL_2D, - BuiltinOperator_CONCATENATION, - BuiltinOperator_CONV_2D, - BuiltinOperator_DEPTHWISE_CONV_2D, - BuiltinOperator_EMBEDDING_LOOKUP, - BuiltinOperator_FULLY_CONNECTED, - BuiltinOperator_HASHTABLE_LOOKUP, - BuiltinOperator_L2_NORMALIZATION, - BuiltinOperator_L2_POOL_2D, - BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, - BuiltinOperator_LOGISTIC, - BuiltinOperator_LSH_PROJECTION, - BuiltinOperator_LSTM, - BuiltinOperator_MAX_POOL_2D, - BuiltinOperator_MUL, - BuiltinOperator_RELU, - BuiltinOperator_RELU_N1_TO_1, - BuiltinOperator_RELU6, - BuiltinOperator_RESHAPE, - BuiltinOperator_RESIZE_BILINEAR, - BuiltinOperator_RNN, - BuiltinOperator_SOFTMAX, - BuiltinOperator_SPACE_TO_DEPTH, - BuiltinOperator_SVDF, - BuiltinOperator_TANH, - BuiltinOperator_CONCAT_EMBEDDINGS, - BuiltinOperator_SKIP_GRAM, - BuiltinOperator_CALL, - BuiltinOperator_CUSTOM, - BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, - BuiltinOperator_PAD, - BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, - BuiltinOperator_GATHER, - BuiltinOperator_BATCH_TO_SPACE_ND, - BuiltinOperator_SPACE_TO_BATCH_ND, - BuiltinOperator_TRANSPOSE, - BuiltinOperator_MEAN, - BuiltinOperator_SUB, - BuiltinOperator_DIV, - BuiltinOperator_SQUEEZE, - BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, - BuiltinOperator_STRIDED_SLICE, - BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, - BuiltinOperator_EXP}; + BuiltinOperator_ADD, + BuiltinOperator_AVERAGE_POOL_2D, + BuiltinOperator_CONCATENATION, + BuiltinOperator_CONV_2D, + BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_EMBEDDING_LOOKUP, + BuiltinOperator_FULLY_CONNECTED, + BuiltinOperator_HASHTABLE_LOOKUP, + BuiltinOperator_L2_NORMALIZATION, + BuiltinOperator_L2_POOL_2D, + BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION, + BuiltinOperator_LOGISTIC, + BuiltinOperator_LSH_PROJECTION, + BuiltinOperator_LSTM, + BuiltinOperator_MAX_POOL_2D, + BuiltinOperator_MUL, + BuiltinOperator_RELU, + BuiltinOperator_RELU_N1_TO_1, + BuiltinOperator_RELU6, + BuiltinOperator_RESHAPE, + BuiltinOperator_RESIZE_BILINEAR, + BuiltinOperator_RNN, + BuiltinOperator_SOFTMAX, + BuiltinOperator_SPACE_TO_DEPTH, + BuiltinOperator_SVDF, + BuiltinOperator_TANH, + BuiltinOperator_CONCAT_EMBEDDINGS, + BuiltinOperator_SKIP_GRAM, + BuiltinOperator_CALL, + BuiltinOperator_CUSTOM, + BuiltinOperator_EMBEDDING_LOOKUP_SPARSE, + BuiltinOperator_PAD, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_GATHER, + BuiltinOperator_BATCH_TO_SPACE_ND, + BuiltinOperator_SPACE_TO_BATCH_ND, + BuiltinOperator_TRANSPOSE, + BuiltinOperator_MEAN, + BuiltinOperator_SUB, + BuiltinOperator_DIV, + BuiltinOperator_SQUEEZE, + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_STRIDED_SLICE, + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOperator_EXP, + BuiltinOperator_TOPK_V2 + }; return values; } inline const char **EnumNamesBuiltinOperator() { - static const char *names[] = {"ADD", - "AVERAGE_POOL_2D", - "CONCATENATION", - "CONV_2D", - "DEPTHWISE_CONV_2D", - "", - "", - "EMBEDDING_LOOKUP", - "", - "FULLY_CONNECTED", - "HASHTABLE_LOOKUP", - "L2_NORMALIZATION", - "L2_POOL_2D", - "LOCAL_RESPONSE_NORMALIZATION", - "LOGISTIC", - "LSH_PROJECTION", - "LSTM", - "MAX_POOL_2D", - "MUL", - "RELU", - "RELU_N1_TO_1", - "RELU6", - "RESHAPE", - "RESIZE_BILINEAR", - "RNN", - "SOFTMAX", - "SPACE_TO_DEPTH", - "SVDF", - "TANH", - "CONCAT_EMBEDDINGS", - "SKIP_GRAM", - "CALL", - "CUSTOM", - "EMBEDDING_LOOKUP_SPARSE", - "PAD", - "UNIDIRECTIONAL_SEQUENCE_RNN", - "GATHER", - "BATCH_TO_SPACE_ND", - "SPACE_TO_BATCH_ND", - "TRANSPOSE", - "MEAN", - "SUB", - "DIV", - "SQUEEZE", - "UNIDIRECTIONAL_SEQUENCE_LSTM", - "STRIDED_SLICE", - "BIDIRECTIONAL_SEQUENCE_RNN", - "EXP", - nullptr}; + static const char *names[] = { + "ADD", + "AVERAGE_POOL_2D", + "CONCATENATION", + "CONV_2D", + "DEPTHWISE_CONV_2D", + "", + "", + "EMBEDDING_LOOKUP", + "", + "FULLY_CONNECTED", + "HASHTABLE_LOOKUP", + "L2_NORMALIZATION", + "L2_POOL_2D", + "LOCAL_RESPONSE_NORMALIZATION", + "LOGISTIC", + "LSH_PROJECTION", + "LSTM", + "MAX_POOL_2D", + "MUL", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "RESHAPE", + "RESIZE_BILINEAR", + "RNN", + "SOFTMAX", + "SPACE_TO_DEPTH", + "SVDF", + "TANH", + "CONCAT_EMBEDDINGS", + "SKIP_GRAM", + "CALL", + "CUSTOM", + "EMBEDDING_LOOKUP_SPARSE", + "PAD", + "UNIDIRECTIONAL_SEQUENCE_RNN", + "GATHER", + "BATCH_TO_SPACE_ND", + "SPACE_TO_BATCH_ND", + "TRANSPOSE", + "MEAN", + "SUB", + "DIV", + "SQUEEZE", + "UNIDIRECTIONAL_SEQUENCE_LSTM", + "STRIDED_SLICE", + "BIDIRECTIONAL_SEQUENCE_RNN", + "EXP", + "TOPK_V2", + nullptr + }; return names; } @@ -366,85 +388,91 @@ enum BuiltinOptions { BuiltinOptions_SequenceRNNOptions = 31, BuiltinOptions_StridedSliceOptions = 32, BuiltinOptions_ExpOptions = 33, + BuiltinOptions_TopKV2Options = 34, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_ExpOptions + BuiltinOptions_MAX = BuiltinOptions_TopKV2Options }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[34] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[35] { static BuiltinOptions values[] = { - BuiltinOptions_NONE, - BuiltinOptions_Conv2DOptions, - BuiltinOptions_DepthwiseConv2DOptions, - BuiltinOptions_ConcatEmbeddingsOptions, - BuiltinOptions_LSHProjectionOptions, - BuiltinOptions_Pool2DOptions, - BuiltinOptions_SVDFOptions, - BuiltinOptions_RNNOptions, - BuiltinOptions_FullyConnectedOptions, - BuiltinOptions_SoftmaxOptions, - BuiltinOptions_ConcatenationOptions, - BuiltinOptions_AddOptions, - BuiltinOptions_L2NormOptions, - BuiltinOptions_LocalResponseNormalizationOptions, - BuiltinOptions_LSTMOptions, - BuiltinOptions_ResizeBilinearOptions, - BuiltinOptions_CallOptions, - BuiltinOptions_ReshapeOptions, - BuiltinOptions_SkipGramOptions, - BuiltinOptions_SpaceToDepthOptions, - BuiltinOptions_EmbeddingLookupSparseOptions, - BuiltinOptions_MulOptions, - BuiltinOptions_PadOptions, - BuiltinOptions_GatherOptions, - BuiltinOptions_BatchToSpaceNDOptions, - BuiltinOptions_SpaceToBatchNDOptions, - BuiltinOptions_TransposeOptions, - BuiltinOptions_MeanOptions, - BuiltinOptions_SubOptions, - BuiltinOptions_DivOptions, - BuiltinOptions_SqueezeOptions, - BuiltinOptions_SequenceRNNOptions, - BuiltinOptions_StridedSliceOptions, - BuiltinOptions_ExpOptions}; + BuiltinOptions_NONE, + BuiltinOptions_Conv2DOptions, + BuiltinOptions_DepthwiseConv2DOptions, + BuiltinOptions_ConcatEmbeddingsOptions, + BuiltinOptions_LSHProjectionOptions, + BuiltinOptions_Pool2DOptions, + BuiltinOptions_SVDFOptions, + BuiltinOptions_RNNOptions, + BuiltinOptions_FullyConnectedOptions, + BuiltinOptions_SoftmaxOptions, + BuiltinOptions_ConcatenationOptions, + BuiltinOptions_AddOptions, + BuiltinOptions_L2NormOptions, + BuiltinOptions_LocalResponseNormalizationOptions, + BuiltinOptions_LSTMOptions, + BuiltinOptions_ResizeBilinearOptions, + BuiltinOptions_CallOptions, + BuiltinOptions_ReshapeOptions, + BuiltinOptions_SkipGramOptions, + BuiltinOptions_SpaceToDepthOptions, + BuiltinOptions_EmbeddingLookupSparseOptions, + BuiltinOptions_MulOptions, + BuiltinOptions_PadOptions, + BuiltinOptions_GatherOptions, + BuiltinOptions_BatchToSpaceNDOptions, + BuiltinOptions_SpaceToBatchNDOptions, + BuiltinOptions_TransposeOptions, + BuiltinOptions_MeanOptions, + BuiltinOptions_SubOptions, + BuiltinOptions_DivOptions, + BuiltinOptions_SqueezeOptions, + BuiltinOptions_SequenceRNNOptions, + BuiltinOptions_StridedSliceOptions, + BuiltinOptions_ExpOptions, + BuiltinOptions_TopKV2Options + }; return values; } inline const char **EnumNamesBuiltinOptions() { - static const char *names[] = {"NONE", - "Conv2DOptions", - "DepthwiseConv2DOptions", - "ConcatEmbeddingsOptions", - "LSHProjectionOptions", - "Pool2DOptions", - "SVDFOptions", - "RNNOptions", - "FullyConnectedOptions", - "SoftmaxOptions", - "ConcatenationOptions", - "AddOptions", - "L2NormOptions", - "LocalResponseNormalizationOptions", - "LSTMOptions", - "ResizeBilinearOptions", - "CallOptions", - "ReshapeOptions", - "SkipGramOptions", - "SpaceToDepthOptions", - "EmbeddingLookupSparseOptions", - "MulOptions", - "PadOptions", - "GatherOptions", - "BatchToSpaceNDOptions", - "SpaceToBatchNDOptions", - "TransposeOptions", - "MeanOptions", - "SubOptions", - "DivOptions", - "SqueezeOptions", - "SequenceRNNOptions", - "StridedSliceOptions", - "ExpOptions", - nullptr}; + static const char *names[] = { + "NONE", + "Conv2DOptions", + "DepthwiseConv2DOptions", + "ConcatEmbeddingsOptions", + "LSHProjectionOptions", + "Pool2DOptions", + "SVDFOptions", + "RNNOptions", + "FullyConnectedOptions", + "SoftmaxOptions", + "ConcatenationOptions", + "AddOptions", + "L2NormOptions", + "LocalResponseNormalizationOptions", + "LSTMOptions", + "ResizeBilinearOptions", + "CallOptions", + "ReshapeOptions", + "SkipGramOptions", + "SpaceToDepthOptions", + "EmbeddingLookupSparseOptions", + "MulOptions", + "PadOptions", + "GatherOptions", + "BatchToSpaceNDOptions", + "SpaceToBatchNDOptions", + "TransposeOptions", + "MeanOptions", + "SubOptions", + "DivOptions", + "SqueezeOptions", + "SequenceRNNOptions", + "StridedSliceOptions", + "ExpOptions", + "TopKV2Options", + nullptr + }; return names; } @@ -453,211 +481,166 @@ inline const char *EnumNameBuiltinOptions(BuiltinOptions e) { return EnumNamesBuiltinOptions()[index]; } -template -struct BuiltinOptionsTraits { +template struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_NONE; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_Conv2DOptions; }; -template <> -struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = - BuiltinOptions_DepthwiseConv2DOptions; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DepthwiseConv2DOptions; }; -template <> -struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = - BuiltinOptions_ConcatEmbeddingsOptions; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_ConcatEmbeddingsOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_LSHProjectionOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_Pool2DOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SVDFOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_RNNOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_FullyConnectedOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SoftmaxOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_ConcatenationOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_AddOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_L2NormOptions; }; -template <> -struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = - BuiltinOptions_LocalResponseNormalizationOptions; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_LocalResponseNormalizationOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_LSTMOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_ResizeBilinearOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_CallOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_ReshapeOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SkipGramOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SpaceToDepthOptions; }; -template <> -struct BuiltinOptionsTraits { - static const BuiltinOptions enum_value = - BuiltinOptions_EmbeddingLookupSparseOptions; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_EmbeddingLookupSparseOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_MulOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_PadOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_GatherOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_BatchToSpaceNDOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SpaceToBatchNDOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_TransposeOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_MeanOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SubOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_DivOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SqueezeOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_SequenceRNNOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_StridedSliceOptions; }; -template <> -struct BuiltinOptionsTraits { +template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_ExpOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; BuiltinOptionsUnion() : type(BuiltinOptions_NONE), value(nullptr) {} - BuiltinOptionsUnion(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT - : type(BuiltinOptions_NONE), - value(nullptr) { - std::swap(type, u.type); - std::swap(value, u.value); - } + BuiltinOptionsUnion(BuiltinOptionsUnion&& u) FLATBUFFERS_NOEXCEPT : + type(BuiltinOptions_NONE), value(nullptr) + { std::swap(type, u.type); std::swap(value, u.value); } BuiltinOptionsUnion(const BuiltinOptionsUnion &) FLATBUFFERS_NOEXCEPT; - BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) - FLATBUFFERS_NOEXCEPT { - BuiltinOptionsUnion t(u); - std::swap(type, t.type); - std::swap(value, t.value); - return *this; - } - BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT { - std::swap(type, u.type); - std::swap(value, u.value); - return *this; - } + BuiltinOptionsUnion &operator=(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT + { BuiltinOptionsUnion t(u); std::swap(type, t.type); std::swap(value, t.value); return *this; } + BuiltinOptionsUnion &operator=(BuiltinOptionsUnion &&u) FLATBUFFERS_NOEXCEPT + { std::swap(type, u.type); std::swap(value, u.value); return *this; } ~BuiltinOptionsUnion() { Reset(); } void Reset(); #ifndef FLATBUFFERS_CPP98_STL template - void Set(T &&val) { + void Set(T&& val) { Reset(); type = BuiltinOptionsTraits::enum_value; if (type != BuiltinOptions_NONE) { @@ -666,352 +649,285 @@ struct BuiltinOptionsUnion { } #endif // FLATBUFFERS_CPP98_STL - static void *UnPack(const void *obj, BuiltinOptions type, - const flatbuffers::resolver_function_t *resolver); - flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; + static void *UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver); + flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher = nullptr) const; Conv2DOptionsT *AsConv2DOptions() { - return type == BuiltinOptions_Conv2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; } const Conv2DOptionsT *AsConv2DOptions() const { - return type == BuiltinOptions_Conv2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_Conv2DOptions ? + reinterpret_cast(value) : nullptr; } DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() { - return type == BuiltinOptions_DepthwiseConv2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; } const DepthwiseConv2DOptionsT *AsDepthwiseConv2DOptions() const { - return type == BuiltinOptions_DepthwiseConv2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_DepthwiseConv2DOptions ? + reinterpret_cast(value) : nullptr; } ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() { - return type == BuiltinOptions_ConcatEmbeddingsOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; } const ConcatEmbeddingsOptionsT *AsConcatEmbeddingsOptions() const { - return type == BuiltinOptions_ConcatEmbeddingsOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ConcatEmbeddingsOptions ? + reinterpret_cast(value) : nullptr; } LSHProjectionOptionsT *AsLSHProjectionOptions() { - return type == BuiltinOptions_LSHProjectionOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; } const LSHProjectionOptionsT *AsLSHProjectionOptions() const { - return type == BuiltinOptions_LSHProjectionOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_LSHProjectionOptions ? + reinterpret_cast(value) : nullptr; } Pool2DOptionsT *AsPool2DOptions() { - return type == BuiltinOptions_Pool2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; } const Pool2DOptionsT *AsPool2DOptions() const { - return type == BuiltinOptions_Pool2DOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_Pool2DOptions ? + reinterpret_cast(value) : nullptr; } SVDFOptionsT *AsSVDFOptions() { - return type == BuiltinOptions_SVDFOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; } const SVDFOptionsT *AsSVDFOptions() const { - return type == BuiltinOptions_SVDFOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SVDFOptions ? + reinterpret_cast(value) : nullptr; } RNNOptionsT *AsRNNOptions() { - return type == BuiltinOptions_RNNOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; } const RNNOptionsT *AsRNNOptions() const { - return type == BuiltinOptions_RNNOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_RNNOptions ? + reinterpret_cast(value) : nullptr; } FullyConnectedOptionsT *AsFullyConnectedOptions() { - return type == BuiltinOptions_FullyConnectedOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; } const FullyConnectedOptionsT *AsFullyConnectedOptions() const { - return type == BuiltinOptions_FullyConnectedOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_FullyConnectedOptions ? + reinterpret_cast(value) : nullptr; } SoftmaxOptionsT *AsSoftmaxOptions() { - return type == BuiltinOptions_SoftmaxOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; } const SoftmaxOptionsT *AsSoftmaxOptions() const { - return type == BuiltinOptions_SoftmaxOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SoftmaxOptions ? + reinterpret_cast(value) : nullptr; } ConcatenationOptionsT *AsConcatenationOptions() { - return type == BuiltinOptions_ConcatenationOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; } const ConcatenationOptionsT *AsConcatenationOptions() const { - return type == BuiltinOptions_ConcatenationOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ConcatenationOptions ? + reinterpret_cast(value) : nullptr; } AddOptionsT *AsAddOptions() { - return type == BuiltinOptions_AddOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; } const AddOptionsT *AsAddOptions() const { - return type == BuiltinOptions_AddOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_AddOptions ? + reinterpret_cast(value) : nullptr; } L2NormOptionsT *AsL2NormOptions() { - return type == BuiltinOptions_L2NormOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; } const L2NormOptionsT *AsL2NormOptions() const { - return type == BuiltinOptions_L2NormOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_L2NormOptions ? + reinterpret_cast(value) : nullptr; } LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() { - return type == BuiltinOptions_LocalResponseNormalizationOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; } - const LocalResponseNormalizationOptionsT * - AsLocalResponseNormalizationOptions() const { - return type == BuiltinOptions_LocalResponseNormalizationOptions - ? reinterpret_cast( - value) - : nullptr; + const LocalResponseNormalizationOptionsT *AsLocalResponseNormalizationOptions() const { + return type == BuiltinOptions_LocalResponseNormalizationOptions ? + reinterpret_cast(value) : nullptr; } LSTMOptionsT *AsLSTMOptions() { - return type == BuiltinOptions_LSTMOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; } const LSTMOptionsT *AsLSTMOptions() const { - return type == BuiltinOptions_LSTMOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_LSTMOptions ? + reinterpret_cast(value) : nullptr; } ResizeBilinearOptionsT *AsResizeBilinearOptions() { - return type == BuiltinOptions_ResizeBilinearOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; } const ResizeBilinearOptionsT *AsResizeBilinearOptions() const { - return type == BuiltinOptions_ResizeBilinearOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ResizeBilinearOptions ? + reinterpret_cast(value) : nullptr; } CallOptionsT *AsCallOptions() { - return type == BuiltinOptions_CallOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; } const CallOptionsT *AsCallOptions() const { - return type == BuiltinOptions_CallOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_CallOptions ? + reinterpret_cast(value) : nullptr; } ReshapeOptionsT *AsReshapeOptions() { - return type == BuiltinOptions_ReshapeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; } const ReshapeOptionsT *AsReshapeOptions() const { - return type == BuiltinOptions_ReshapeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ReshapeOptions ? + reinterpret_cast(value) : nullptr; } SkipGramOptionsT *AsSkipGramOptions() { - return type == BuiltinOptions_SkipGramOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; } const SkipGramOptionsT *AsSkipGramOptions() const { - return type == BuiltinOptions_SkipGramOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SkipGramOptions ? + reinterpret_cast(value) : nullptr; } SpaceToDepthOptionsT *AsSpaceToDepthOptions() { - return type == BuiltinOptions_SpaceToDepthOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; } const SpaceToDepthOptionsT *AsSpaceToDepthOptions() const { - return type == BuiltinOptions_SpaceToDepthOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SpaceToDepthOptions ? + reinterpret_cast(value) : nullptr; } EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() { - return type == BuiltinOptions_EmbeddingLookupSparseOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; } const EmbeddingLookupSparseOptionsT *AsEmbeddingLookupSparseOptions() const { - return type == BuiltinOptions_EmbeddingLookupSparseOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_EmbeddingLookupSparseOptions ? + reinterpret_cast(value) : nullptr; } MulOptionsT *AsMulOptions() { - return type == BuiltinOptions_MulOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; } const MulOptionsT *AsMulOptions() const { - return type == BuiltinOptions_MulOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_MulOptions ? + reinterpret_cast(value) : nullptr; } PadOptionsT *AsPadOptions() { - return type == BuiltinOptions_PadOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_PadOptions ? + reinterpret_cast(value) : nullptr; } const PadOptionsT *AsPadOptions() const { - return type == BuiltinOptions_PadOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_PadOptions ? + reinterpret_cast(value) : nullptr; } GatherOptionsT *AsGatherOptions() { - return type == BuiltinOptions_GatherOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_GatherOptions ? + reinterpret_cast(value) : nullptr; } const GatherOptionsT *AsGatherOptions() const { - return type == BuiltinOptions_GatherOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_GatherOptions ? + reinterpret_cast(value) : nullptr; } BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() { - return type == BuiltinOptions_BatchToSpaceNDOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_BatchToSpaceNDOptions ? + reinterpret_cast(value) : nullptr; } const BatchToSpaceNDOptionsT *AsBatchToSpaceNDOptions() const { - return type == BuiltinOptions_BatchToSpaceNDOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_BatchToSpaceNDOptions ? + reinterpret_cast(value) : nullptr; } SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() { - return type == BuiltinOptions_SpaceToBatchNDOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SpaceToBatchNDOptions ? + reinterpret_cast(value) : nullptr; } const SpaceToBatchNDOptionsT *AsSpaceToBatchNDOptions() const { - return type == BuiltinOptions_SpaceToBatchNDOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SpaceToBatchNDOptions ? + reinterpret_cast(value) : nullptr; } TransposeOptionsT *AsTransposeOptions() { - return type == BuiltinOptions_TransposeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_TransposeOptions ? + reinterpret_cast(value) : nullptr; } const TransposeOptionsT *AsTransposeOptions() const { - return type == BuiltinOptions_TransposeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_TransposeOptions ? + reinterpret_cast(value) : nullptr; } MeanOptionsT *AsMeanOptions() { - return type == BuiltinOptions_MeanOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_MeanOptions ? + reinterpret_cast(value) : nullptr; } const MeanOptionsT *AsMeanOptions() const { - return type == BuiltinOptions_MeanOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_MeanOptions ? + reinterpret_cast(value) : nullptr; } SubOptionsT *AsSubOptions() { - return type == BuiltinOptions_SubOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SubOptions ? + reinterpret_cast(value) : nullptr; } const SubOptionsT *AsSubOptions() const { - return type == BuiltinOptions_SubOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SubOptions ? + reinterpret_cast(value) : nullptr; } DivOptionsT *AsDivOptions() { - return type == BuiltinOptions_DivOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_DivOptions ? + reinterpret_cast(value) : nullptr; } const DivOptionsT *AsDivOptions() const { - return type == BuiltinOptions_DivOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_DivOptions ? + reinterpret_cast(value) : nullptr; } SqueezeOptionsT *AsSqueezeOptions() { - return type == BuiltinOptions_SqueezeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SqueezeOptions ? + reinterpret_cast(value) : nullptr; } const SqueezeOptionsT *AsSqueezeOptions() const { - return type == BuiltinOptions_SqueezeOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SqueezeOptions ? + reinterpret_cast(value) : nullptr; } SequenceRNNOptionsT *AsSequenceRNNOptions() { - return type == BuiltinOptions_SequenceRNNOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SequenceRNNOptions ? + reinterpret_cast(value) : nullptr; } const SequenceRNNOptionsT *AsSequenceRNNOptions() const { - return type == BuiltinOptions_SequenceRNNOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_SequenceRNNOptions ? + reinterpret_cast(value) : nullptr; } StridedSliceOptionsT *AsStridedSliceOptions() { - return type == BuiltinOptions_StridedSliceOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_StridedSliceOptions ? + reinterpret_cast(value) : nullptr; } const StridedSliceOptionsT *AsStridedSliceOptions() const { - return type == BuiltinOptions_StridedSliceOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_StridedSliceOptions ? + reinterpret_cast(value) : nullptr; } ExpOptionsT *AsExpOptions() { - return type == BuiltinOptions_ExpOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ExpOptions ? + reinterpret_cast(value) : nullptr; } const ExpOptionsT *AsExpOptions() const { - return type == BuiltinOptions_ExpOptions - ? reinterpret_cast(value) - : nullptr; + return type == BuiltinOptions_ExpOptions ? + reinterpret_cast(value) : nullptr; + } + TopKV2OptionsT *AsTopKV2Options() { + return type == BuiltinOptions_TopKV2Options ? + reinterpret_cast(value) : nullptr; + } + const TopKV2OptionsT *AsTopKV2Options() const { + return type == BuiltinOptions_TopKV2Options ? + reinterpret_cast(value) : nullptr; } }; -bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, - BuiltinOptions type); -bool VerifyBuiltinOptionsVector( - flatbuffers::Verifier &verifier, - const flatbuffers::Vector> *values, - const flatbuffers::Vector *types); +bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); +bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types); enum Padding { Padding_SAME = 0, @@ -1021,12 +937,19 @@ enum Padding { }; inline Padding (&EnumValuesPadding())[2] { - static Padding values[] = {Padding_SAME, Padding_VALID}; + static Padding values[] = { + Padding_SAME, + Padding_VALID + }; return values; } inline const char **EnumNamesPadding() { - static const char *names[] = {"SAME", "VALID", nullptr}; + static const char *names[] = { + "SAME", + "VALID", + nullptr + }; return names; } @@ -1048,15 +971,26 @@ enum ActivationFunctionType { inline ActivationFunctionType (&EnumValuesActivationFunctionType())[6] { static ActivationFunctionType values[] = { - ActivationFunctionType_NONE, ActivationFunctionType_RELU, - ActivationFunctionType_RELU_N1_TO_1, ActivationFunctionType_RELU6, - ActivationFunctionType_TANH, ActivationFunctionType_SIGN_BIT}; + ActivationFunctionType_NONE, + ActivationFunctionType_RELU, + ActivationFunctionType_RELU_N1_TO_1, + ActivationFunctionType_RELU6, + ActivationFunctionType_TANH, + ActivationFunctionType_SIGN_BIT + }; return values; } inline const char **EnumNamesActivationFunctionType() { - static const char *names[] = {"NONE", "RELU", "RELU_N1_TO_1", "RELU6", - "TANH", "SIGN_BIT", nullptr}; + static const char *names[] = { + "NONE", + "RELU", + "RELU_N1_TO_1", + "RELU6", + "TANH", + "SIGN_BIT", + nullptr + }; return names; } @@ -1074,14 +1008,21 @@ enum LSHProjectionType { }; inline LSHProjectionType (&EnumValuesLSHProjectionType())[3] { - static LSHProjectionType values[] = {LSHProjectionType_UNKNOWN, - LSHProjectionType_SPARSE, - LSHProjectionType_DENSE}; + static LSHProjectionType values[] = { + LSHProjectionType_UNKNOWN, + LSHProjectionType_SPARSE, + LSHProjectionType_DENSE + }; return values; } inline const char **EnumNamesLSHProjectionType() { - static const char *names[] = {"UNKNOWN", "SPARSE", "DENSE", nullptr}; + static const char *names[] = { + "UNKNOWN", + "SPARSE", + "DENSE", + nullptr + }; return names; } @@ -1099,13 +1040,21 @@ enum CombinerType { }; inline CombinerType (&EnumValuesCombinerType())[3] { - static CombinerType values[] = {CombinerType_SUM, CombinerType_MEAN, - CombinerType_SQRTN}; + static CombinerType values[] = { + CombinerType_SUM, + CombinerType_MEAN, + CombinerType_SQRTN + }; return values; } inline const char **EnumNamesCombinerType() { - static const char *names[] = {"SUM", "MEAN", "SQRTN", nullptr}; + static const char *names[] = { + "SUM", + "MEAN", + "SQRTN", + nullptr + }; return names; } @@ -1121,12 +1070,17 @@ enum CustomOptionsFormat { }; inline CustomOptionsFormat (&EnumValuesCustomOptionsFormat())[1] { - static CustomOptionsFormat values[] = {CustomOptionsFormat_FLEXBUFFERS}; + static CustomOptionsFormat values[] = { + CustomOptionsFormat_FLEXBUFFERS + }; return values; } inline const char **EnumNamesCustomOptionsFormat() { - static const char *names[] = {"FLEXBUFFERS", nullptr}; + static const char *names[] = { + "FLEXBUFFERS", + nullptr + }; return names; } @@ -1141,13 +1095,18 @@ struct QuantizationParametersT : public flatbuffers::NativeTable { std::vector max; std::vector scale; std::vector zero_point; - QuantizationParametersT() {} + QuantizationParametersT() { + } }; -struct QuantizationParameters FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct QuantizationParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef QuantizationParametersT NativeTableType; - enum { VT_MIN = 4, VT_MAX = 6, VT_SCALE = 8, VT_ZERO_POINT = 10 }; + enum { + VT_MIN = 4, + VT_MAX = 6, + VT_SCALE = 8, + VT_ZERO_POINT = 10 + }; const flatbuffers::Vector *min() const { return GetPointer *>(VT_MIN); } @@ -1161,20 +1120,20 @@ struct QuantizationParameters FLATBUFFERS_FINAL_CLASS return GetPointer *>(VT_ZERO_POINT); } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_MIN) && - verifier.Verify(min()) && VerifyOffset(verifier, VT_MAX) && - verifier.Verify(max()) && VerifyOffset(verifier, VT_SCALE) && - verifier.Verify(scale()) && VerifyOffset(verifier, VT_ZERO_POINT) && - verifier.Verify(zero_point()) && verifier.EndTable(); - } - QuantizationParametersT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - QuantizationParametersT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_MIN) && + verifier.Verify(min()) && + VerifyOffset(verifier, VT_MAX) && + verifier.Verify(max()) && + VerifyOffset(verifier, VT_SCALE) && + verifier.Verify(scale()) && + VerifyOffset(verifier, VT_ZERO_POINT) && + verifier.Verify(zero_point()) && + verifier.EndTable(); + } + QuantizationParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct QuantizationParametersBuilder { @@ -1189,16 +1148,14 @@ struct QuantizationParametersBuilder { void add_scale(flatbuffers::Offset> scale) { fbb_.AddOffset(QuantizationParameters::VT_SCALE, scale); } - void add_zero_point( - flatbuffers::Offset> zero_point) { + void add_zero_point(flatbuffers::Offset> zero_point) { fbb_.AddOffset(QuantizationParameters::VT_ZERO_POINT, zero_point); } explicit QuantizationParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - QuantizationParametersBuilder &operator=( - const QuantizationParametersBuilder &); + QuantizationParametersBuilder &operator=(const QuantizationParametersBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -1220,23 +1177,21 @@ inline flatbuffers::Offset CreateQuantizationParameters( return builder_.Finish(); } -inline flatbuffers::Offset -CreateQuantizationParametersDirect( +inline flatbuffers::Offset CreateQuantizationParametersDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *min = nullptr, const std::vector *max = nullptr, const std::vector *scale = nullptr, const std::vector *zero_point = nullptr) { return tflite::CreateQuantizationParameters( - _fbb, min ? _fbb.CreateVector(*min) : 0, + _fbb, + min ? _fbb.CreateVector(*min) : 0, max ? _fbb.CreateVector(*max) : 0, scale ? _fbb.CreateVector(*scale) : 0, zero_point ? _fbb.CreateVector(*zero_point) : 0); } -flatbuffers::Offset CreateQuantizationParameters( - flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct TensorT : public flatbuffers::NativeTable { typedef Tensor TableType; @@ -1245,7 +1200,10 @@ struct TensorT : public flatbuffers::NativeTable { uint32_t buffer; std::string name; std::unique_ptr quantization; - TensorT() : type(TensorType_FLOAT32), buffer(0) {} + TensorT() + : type(TensorType_FLOAT32), + buffer(0) { + } }; struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -1263,7 +1221,9 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { TensorType type() const { return static_cast(GetField(VT_TYPE, 0)); } - uint32_t buffer() const { return GetField(VT_BUFFER, 0); } + uint32_t buffer() const { + return GetField(VT_BUFFER, 0); + } const flatbuffers::String *name() const { return GetPointer(VT_NAME); } @@ -1271,20 +1231,20 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer(VT_QUANTIZATION); } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SHAPE) && - verifier.Verify(shape()) && VerifyField(verifier, VT_TYPE) && + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_SHAPE) && + verifier.Verify(shape()) && + VerifyField(verifier, VT_TYPE) && VerifyField(verifier, VT_BUFFER) && - VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) && + VerifyOffset(verifier, VT_NAME) && + verifier.Verify(name()) && VerifyOffset(verifier, VT_QUANTIZATION) && - verifier.VerifyTable(quantization()) && verifier.EndTable(); + verifier.VerifyTable(quantization()) && + verifier.EndTable(); } - TensorT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = - nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct TensorBuilder { @@ -1302,11 +1262,11 @@ struct TensorBuilder { void add_name(flatbuffers::Offset name) { fbb_.AddOffset(Tensor::VT_NAME, name); } - void add_quantization( - flatbuffers::Offset quantization) { + void add_quantization(flatbuffers::Offset quantization) { fbb_.AddOffset(Tensor::VT_QUANTIZATION, quantization); } - explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { + explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } TensorBuilder &operator=(const TensorBuilder &); @@ -1320,7 +1280,8 @@ struct TensorBuilder { inline flatbuffers::Offset CreateTensor( flatbuffers::FlatBufferBuilder &_fbb, flatbuffers::Offset> shape = 0, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, + TensorType type = TensorType_FLOAT32, + uint32_t buffer = 0, flatbuffers::Offset name = 0, flatbuffers::Offset quantization = 0) { TensorBuilder builder_(_fbb); @@ -1335,17 +1296,20 @@ inline flatbuffers::Offset CreateTensor( inline flatbuffers::Offset CreateTensorDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *shape = nullptr, - TensorType type = TensorType_FLOAT32, uint32_t buffer = 0, + TensorType type = TensorType_FLOAT32, + uint32_t buffer = 0, const char *name = nullptr, flatbuffers::Offset quantization = 0) { return tflite::CreateTensor( - _fbb, shape ? _fbb.CreateVector(*shape) : 0, type, buffer, - name ? _fbb.CreateString(name) : 0, quantization); + _fbb, + shape ? _fbb.CreateVector(*shape) : 0, + type, + buffer, + name ? _fbb.CreateString(name) : 0, + quantization); } -flatbuffers::Offset CreateTensor( - flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct Conv2DOptionsT : public flatbuffers::NativeTable { typedef Conv2DOptions TableType; @@ -1357,7 +1321,8 @@ struct Conv2DOptionsT : public flatbuffers::NativeTable { : padding(Padding_SAME), stride_w(0), stride_h(0), - fused_activation_function(ActivationFunctionType_NONE) {} + fused_activation_function(ActivationFunctionType_NONE) { + } }; struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -1371,11 +1336,14 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { Padding padding() const { return static_cast(GetField(VT_PADDING, 0)); } - int32_t stride_w() const { return GetField(VT_STRIDE_W, 0); } - int32_t stride_h() const { return GetField(VT_STRIDE_H, 0); } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1385,22 +1353,16 @@ struct Conv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - Conv2DOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - Conv2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + Conv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct Conv2DOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_padding(Padding padding) { - fbb_.AddElement(Conv2DOptions::VT_PADDING, - static_cast(padding), 0); + fbb_.AddElement(Conv2DOptions::VT_PADDING, static_cast(padding), 0); } void add_stride_w(int32_t stride_w) { fbb_.AddElement(Conv2DOptions::VT_STRIDE_W, stride_w, 0); @@ -1408,13 +1370,11 @@ struct Conv2DOptionsBuilder { void add_stride_h(int32_t stride_h) { fbb_.AddElement(Conv2DOptions::VT_STRIDE_H, stride_h, 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Conv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit Conv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } Conv2DOptionsBuilder &operator=(const Conv2DOptionsBuilder &); @@ -1426,10 +1386,11 @@ struct Conv2DOptionsBuilder { }; inline flatbuffers::Offset CreateConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { Conv2DOptionsBuilder builder_(_fbb); builder_.add_stride_h(stride_h); builder_.add_stride_w(stride_w); @@ -1438,9 +1399,7 @@ inline flatbuffers::Offset CreateConv2DOptions( return builder_.Finish(); } -flatbuffers::Offset CreateConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct Pool2DOptionsT : public flatbuffers::NativeTable { typedef Pool2DOptions TableType; @@ -1456,7 +1415,8 @@ struct Pool2DOptionsT : public flatbuffers::NativeTable { stride_h(0), filter_width(0), filter_height(0), - fused_activation_function(ActivationFunctionType_NONE) {} + fused_activation_function(ActivationFunctionType_NONE) { + } }; struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -1472,15 +1432,20 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { Padding padding() const { return static_cast(GetField(VT_PADDING, 0)); } - int32_t stride_w() const { return GetField(VT_STRIDE_W, 0); } - int32_t stride_h() const { return GetField(VT_STRIDE_H, 0); } - int32_t filter_width() const { return GetField(VT_FILTER_WIDTH, 0); } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } + int32_t filter_width() const { + return GetField(VT_FILTER_WIDTH, 0); + } int32_t filter_height() const { return GetField(VT_FILTER_HEIGHT, 0); } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1492,22 +1457,16 @@ struct Pool2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - Pool2DOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - Pool2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + Pool2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct Pool2DOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_padding(Padding padding) { - fbb_.AddElement(Pool2DOptions::VT_PADDING, - static_cast(padding), 0); + fbb_.AddElement(Pool2DOptions::VT_PADDING, static_cast(padding), 0); } void add_stride_w(int32_t stride_w) { fbb_.AddElement(Pool2DOptions::VT_STRIDE_W, stride_w, 0); @@ -1521,13 +1480,11 @@ struct Pool2DOptionsBuilder { void add_filter_height(int32_t filter_height) { fbb_.AddElement(Pool2DOptions::VT_FILTER_HEIGHT, filter_height, 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(Pool2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit Pool2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } Pool2DOptionsBuilder &operator=(const Pool2DOptionsBuilder &); @@ -1539,11 +1496,13 @@ struct Pool2DOptionsBuilder { }; inline flatbuffers::Offset CreatePool2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t filter_width = 0, + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t filter_width = 0, int32_t filter_height = 0, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { Pool2DOptionsBuilder builder_(_fbb); builder_.add_filter_height(filter_height); builder_.add_filter_width(filter_width); @@ -1554,9 +1513,7 @@ inline flatbuffers::Offset CreatePool2DOptions( return builder_.Finish(); } -flatbuffers::Offset CreatePool2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable { typedef DepthwiseConv2DOptions TableType; @@ -1570,11 +1527,11 @@ struct DepthwiseConv2DOptionsT : public flatbuffers::NativeTable { stride_w(0), stride_h(0), depth_multiplier(0), - fused_activation_function(ActivationFunctionType_NONE) {} + fused_activation_function(ActivationFunctionType_NONE) { + } }; -struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef DepthwiseConv2DOptionsT NativeTableType; enum { VT_PADDING = 4, @@ -1586,14 +1543,17 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS Padding padding() const { return static_cast(GetField(VT_PADDING, 0)); } - int32_t stride_w() const { return GetField(VT_STRIDE_W, 0); } - int32_t stride_h() const { return GetField(VT_STRIDE_H, 0); } + int32_t stride_w() const { + return GetField(VT_STRIDE_W, 0); + } + int32_t stride_h() const { + return GetField(VT_STRIDE_H, 0); + } int32_t depth_multiplier() const { return GetField(VT_DEPTH_MULTIPLIER, 0); } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1604,22 +1564,16 @@ struct DepthwiseConv2DOptions FLATBUFFERS_FINAL_CLASS VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - DepthwiseConv2DOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - DepthwiseConv2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + DepthwiseConv2DOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct DepthwiseConv2DOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_padding(Padding padding) { - fbb_.AddElement(DepthwiseConv2DOptions::VT_PADDING, - static_cast(padding), 0); + fbb_.AddElement(DepthwiseConv2DOptions::VT_PADDING, static_cast(padding), 0); } void add_stride_w(int32_t stride_w) { fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_W, stride_w, 0); @@ -1628,21 +1582,16 @@ struct DepthwiseConv2DOptionsBuilder { fbb_.AddElement(DepthwiseConv2DOptions::VT_STRIDE_H, stride_h, 0); } void add_depth_multiplier(int32_t depth_multiplier) { - fbb_.AddElement(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, - depth_multiplier, 0); + fbb_.AddElement(DepthwiseConv2DOptions::VT_DEPTH_MULTIPLIER, depth_multiplier, 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement( - DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DepthwiseConv2DOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit DepthwiseConv2DOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - DepthwiseConv2DOptionsBuilder &operator=( - const DepthwiseConv2DOptionsBuilder &); + DepthwiseConv2DOptionsBuilder &operator=(const DepthwiseConv2DOptionsBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -1651,10 +1600,12 @@ struct DepthwiseConv2DOptionsBuilder { }; inline flatbuffers::Offset CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, Padding padding = Padding_SAME, - int32_t stride_w = 0, int32_t stride_h = 0, int32_t depth_multiplier = 0, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + flatbuffers::FlatBufferBuilder &_fbb, + Padding padding = Padding_SAME, + int32_t stride_w = 0, + int32_t stride_h = 0, + int32_t depth_multiplier = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { DepthwiseConv2DOptionsBuilder builder_(_fbb); builder_.add_depth_multiplier(depth_multiplier); builder_.add_stride_h(stride_h); @@ -1664,34 +1615,33 @@ inline flatbuffers::Offset CreateDepthwiseConv2DOptions( return builder_.Finish(); } -flatbuffers::Offset CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ConcatEmbeddingsOptionsT : public flatbuffers::NativeTable { typedef ConcatEmbeddingsOptions TableType; int32_t num_channels; std::vector num_columns_per_channel; std::vector embedding_dim_per_channel; - ConcatEmbeddingsOptionsT() : num_channels(0) {} + ConcatEmbeddingsOptionsT() + : num_channels(0) { + } }; -struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ConcatEmbeddingsOptionsT NativeTableType; enum { VT_NUM_CHANNELS = 4, VT_NUM_COLUMNS_PER_CHANNEL = 6, VT_EMBEDDING_DIM_PER_CHANNEL = 8 }; - int32_t num_channels() const { return GetField(VT_NUM_CHANNELS, 0); } + int32_t num_channels() const { + return GetField(VT_NUM_CHANNELS, 0); + } const flatbuffers::Vector *num_columns_per_channel() const { - return GetPointer *>( - VT_NUM_COLUMNS_PER_CHANNEL); + return GetPointer *>(VT_NUM_COLUMNS_PER_CHANNEL); } const flatbuffers::Vector *embedding_dim_per_channel() const { - return GetPointer *>( - VT_EMBEDDING_DIM_PER_CHANNEL); + return GetPointer *>(VT_EMBEDDING_DIM_PER_CHANNEL); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1699,43 +1649,31 @@ struct ConcatEmbeddingsOptions FLATBUFFERS_FINAL_CLASS VerifyOffset(verifier, VT_NUM_COLUMNS_PER_CHANNEL) && verifier.Verify(num_columns_per_channel()) && VerifyOffset(verifier, VT_EMBEDDING_DIM_PER_CHANNEL) && - verifier.Verify(embedding_dim_per_channel()) && verifier.EndTable(); + verifier.Verify(embedding_dim_per_channel()) && + verifier.EndTable(); } - ConcatEmbeddingsOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - ConcatEmbeddingsOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ConcatEmbeddingsOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ConcatEmbeddingsOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_num_channels(int32_t num_channels) { - fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, - num_channels, 0); + fbb_.AddElement(ConcatEmbeddingsOptions::VT_NUM_CHANNELS, num_channels, 0); } - void add_num_columns_per_channel( - flatbuffers::Offset> - num_columns_per_channel) { - fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, - num_columns_per_channel); + void add_num_columns_per_channel(flatbuffers::Offset> num_columns_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_NUM_COLUMNS_PER_CHANNEL, num_columns_per_channel); } - void add_embedding_dim_per_channel( - flatbuffers::Offset> - embedding_dim_per_channel) { - fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, - embedding_dim_per_channel); + void add_embedding_dim_per_channel(flatbuffers::Offset> embedding_dim_per_channel) { + fbb_.AddOffset(ConcatEmbeddingsOptions::VT_EMBEDDING_DIM_PER_CHANNEL, embedding_dim_per_channel); } explicit ConcatEmbeddingsOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - ConcatEmbeddingsOptionsBuilder &operator=( - const ConcatEmbeddingsOptionsBuilder &); + ConcatEmbeddingsOptionsBuilder &operator=(const ConcatEmbeddingsOptionsBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -1743,13 +1681,11 @@ struct ConcatEmbeddingsOptionsBuilder { } }; -inline flatbuffers::Offset -CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, - int32_t num_channels = 0, - flatbuffers::Offset> - num_columns_per_channel = 0, - flatbuffers::Offset> - embedding_dim_per_channel = 0) { +inline flatbuffers::Offset CreateConcatEmbeddingsOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, + flatbuffers::Offset> num_columns_per_channel = 0, + flatbuffers::Offset> embedding_dim_per_channel = 0) { ConcatEmbeddingsOptionsBuilder builder_(_fbb); builder_.add_embedding_dim_per_channel(embedding_dim_per_channel); builder_.add_num_columns_per_channel(num_columns_per_channel); @@ -1757,61 +1693,54 @@ CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, return builder_.Finish(); } -inline flatbuffers::Offset -CreateConcatEmbeddingsOptionsDirect( - flatbuffers::FlatBufferBuilder &_fbb, int32_t num_channels = 0, +inline flatbuffers::Offset CreateConcatEmbeddingsOptionsDirect( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_channels = 0, const std::vector *num_columns_per_channel = nullptr, const std::vector *embedding_dim_per_channel = nullptr) { return tflite::CreateConcatEmbeddingsOptions( - _fbb, num_channels, - num_columns_per_channel - ? _fbb.CreateVector(*num_columns_per_channel) - : 0, - embedding_dim_per_channel - ? _fbb.CreateVector(*embedding_dim_per_channel) - : 0); + _fbb, + num_channels, + num_columns_per_channel ? _fbb.CreateVector(*num_columns_per_channel) : 0, + embedding_dim_per_channel ? _fbb.CreateVector(*embedding_dim_per_channel) : 0); } -flatbuffers::Offset CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct LSHProjectionOptionsT : public flatbuffers::NativeTable { typedef LSHProjectionOptions TableType; LSHProjectionType type; - LSHProjectionOptionsT() : type(LSHProjectionType_UNKNOWN) {} + LSHProjectionOptionsT() + : type(LSHProjectionType_UNKNOWN) { + } }; -struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct LSHProjectionOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef LSHProjectionOptionsT NativeTableType; - enum { VT_TYPE = 4 }; + enum { + VT_TYPE = 4 + }; LSHProjectionType type() const { return static_cast(GetField(VT_TYPE, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_TYPE) && verifier.EndTable(); + VerifyField(verifier, VT_TYPE) && + verifier.EndTable(); } - LSHProjectionOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - LSHProjectionOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + LSHProjectionOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct LSHProjectionOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_type(LSHProjectionType type) { - fbb_.AddElement(LSHProjectionOptions::VT_TYPE, - static_cast(type), 0); + fbb_.AddElement(LSHProjectionOptions::VT_TYPE, static_cast(type), 0); } explicit LSHProjectionOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } LSHProjectionOptionsBuilder &operator=(const LSHProjectionOptionsBuilder &); @@ -1830,25 +1759,29 @@ inline flatbuffers::Offset CreateLSHProjectionOptions( return builder_.Finish(); } -flatbuffers::Offset CreateLSHProjectionOptions( - flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SVDFOptionsT : public flatbuffers::NativeTable { typedef SVDFOptions TableType; int32_t rank; ActivationFunctionType fused_activation_function; SVDFOptionsT() - : rank(0), fused_activation_function(ActivationFunctionType_NONE) {} + : rank(0), + fused_activation_function(ActivationFunctionType_NONE) { + } }; struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SVDFOptionsT NativeTableType; - enum { VT_RANK = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 }; - int32_t rank() const { return GetField(VT_RANK, 0); } + enum { + VT_RANK = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t rank() const { + return GetField(VT_RANK, 0); + } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1856,14 +1789,9 @@ struct SVDFOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - SVDFOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SVDFOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SVDFOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SVDFOptionsBuilder { @@ -1872,13 +1800,11 @@ struct SVDFOptionsBuilder { void add_rank(int32_t rank) { fbb_.AddElement(SVDFOptions::VT_RANK, rank, 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SVDFOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit SVDFOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SVDFOptionsBuilder &operator=(const SVDFOptionsBuilder &); @@ -1890,57 +1816,51 @@ struct SVDFOptionsBuilder { }; inline flatbuffers::Offset CreateSVDFOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t rank = 0, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + flatbuffers::FlatBufferBuilder &_fbb, + int32_t rank = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { SVDFOptionsBuilder builder_(_fbb); builder_.add_rank(rank); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateSVDFOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct RNNOptionsT : public flatbuffers::NativeTable { typedef RNNOptions TableType; ActivationFunctionType fused_activation_function; - RNNOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + RNNOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct RNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef RNNOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - RNNOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - RNNOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + RNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct RNNOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(RNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit RNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } RNNOptionsBuilder &operator=(const RNNOptionsBuilder &); @@ -1953,16 +1873,13 @@ struct RNNOptionsBuilder { inline flatbuffers::Offset CreateRNNOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { RNNOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SequenceRNNOptionsT : public flatbuffers::NativeTable { typedef SequenceRNNOptions TableType; @@ -1970,16 +1887,21 @@ struct SequenceRNNOptionsT : public flatbuffers::NativeTable { ActivationFunctionType fused_activation_function; SequenceRNNOptionsT() : time_major(false), - fused_activation_function(ActivationFunctionType_NONE) {} + fused_activation_function(ActivationFunctionType_NONE) { + } }; struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SequenceRNNOptionsT NativeTableType; - enum { VT_TIME_MAJOR = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 }; - bool time_major() const { return GetField(VT_TIME_MAJOR, 0) != 0; } + enum { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -1987,30 +1909,22 @@ struct SequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - SequenceRNNOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SequenceRNNOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SequenceRNNOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_time_major(bool time_major) { - fbb_.AddElement(SequenceRNNOptions::VT_TIME_MAJOR, - static_cast(time_major), 0); + fbb_.AddElement(SequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit SequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SequenceRNNOptionsBuilder &operator=(const SequenceRNNOptionsBuilder &); @@ -2022,18 +1936,16 @@ struct SequenceRNNOptionsBuilder { }; inline flatbuffers::Offset CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { SequenceRNNOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); builder_.add_time_major(time_major); return builder_.Finish(); } -flatbuffers::Offset CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable { typedef BidirectionalSequenceRNNOptions TableType; @@ -2041,17 +1953,21 @@ struct BidirectionalSequenceRNNOptionsT : public flatbuffers::NativeTable { ActivationFunctionType fused_activation_function; BidirectionalSequenceRNNOptionsT() : time_major(false), - fused_activation_function(ActivationFunctionType_NONE) {} + fused_activation_function(ActivationFunctionType_NONE) { + } }; -struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef BidirectionalSequenceRNNOptionsT NativeTableType; - enum { VT_TIME_MAJOR = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 }; - bool time_major() const { return GetField(VT_TIME_MAJOR, 0) != 0; } + enum { + VT_TIME_MAJOR = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + bool time_major() const { + return GetField(VT_TIME_MAJOR, 0) != 0; + } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -2059,37 +1975,25 @@ struct BidirectionalSequenceRNNOptions FLATBUFFERS_FINAL_CLASS VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - BidirectionalSequenceRNNOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + BidirectionalSequenceRNNOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct BidirectionalSequenceRNNOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_time_major(bool time_major) { - fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, - static_cast(time_major), 0); - } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement( - BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); - } - explicit BidirectionalSequenceRNNOptionsBuilder( - flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_TIME_MAJOR, static_cast(time_major), 0); + } + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(BidirectionalSequenceRNNOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + explicit BidirectionalSequenceRNNOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - BidirectionalSequenceRNNOptionsBuilder &operator=( - const BidirectionalSequenceRNNOptionsBuilder &); + BidirectionalSequenceRNNOptionsBuilder &operator=(const BidirectionalSequenceRNNOptionsBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -2097,63 +2001,52 @@ struct BidirectionalSequenceRNNOptionsBuilder { } }; -inline flatbuffers::Offset -CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool time_major = false, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { +inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions( + flatbuffers::FlatBufferBuilder &_fbb, + bool time_major = false, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { BidirectionalSequenceRNNOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); builder_.add_time_major(time_major); return builder_.Finish(); } -flatbuffers::Offset -CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct FullyConnectedOptionsT : public flatbuffers::NativeTable { typedef FullyConnectedOptions TableType; ActivationFunctionType fused_activation_function; FullyConnectedOptionsT() - : fused_activation_function(ActivationFunctionType_NONE) {} + : fused_activation_function(ActivationFunctionType_NONE) { + } }; -struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct FullyConnectedOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef FullyConnectedOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - FullyConnectedOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - FullyConnectedOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + FullyConnectedOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct FullyConnectedOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(FullyConnectedOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit FullyConnectedOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } FullyConnectedOptionsBuilder &operator=(const FullyConnectedOptionsBuilder &); @@ -2166,39 +2059,38 @@ struct FullyConnectedOptionsBuilder { inline flatbuffers::Offset CreateFullyConnectedOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { FullyConnectedOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SoftmaxOptionsT : public flatbuffers::NativeTable { typedef SoftmaxOptions TableType; float beta; - SoftmaxOptionsT() : beta(0.0f) {} + SoftmaxOptionsT() + : beta(0.0f) { + } }; struct SoftmaxOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SoftmaxOptionsT NativeTableType; - enum { VT_BETA = 4 }; - float beta() const { return GetField(VT_BETA, 0.0f); } + enum { + VT_BETA = 4 + }; + float beta() const { + return GetField(VT_BETA, 0.0f); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_BETA) && verifier.EndTable(); + VerifyField(verifier, VT_BETA) && + verifier.EndTable(); } - SoftmaxOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SoftmaxOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SoftmaxOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SoftmaxOptionsBuilder { @@ -2208,7 +2100,7 @@ struct SoftmaxOptionsBuilder { fbb_.AddElement(SoftmaxOptions::VT_BETA, beta, 0.0f); } explicit SoftmaxOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SoftmaxOptionsBuilder &operator=(const SoftmaxOptionsBuilder &); @@ -2220,32 +2112,36 @@ struct SoftmaxOptionsBuilder { }; inline flatbuffers::Offset CreateSoftmaxOptions( - flatbuffers::FlatBufferBuilder &_fbb, float beta = 0.0f) { + flatbuffers::FlatBufferBuilder &_fbb, + float beta = 0.0f) { SoftmaxOptionsBuilder builder_(_fbb); builder_.add_beta(beta); return builder_.Finish(); } -flatbuffers::Offset CreateSoftmaxOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ConcatenationOptionsT : public flatbuffers::NativeTable { typedef ConcatenationOptions TableType; int32_t axis; ActivationFunctionType fused_activation_function; ConcatenationOptionsT() - : axis(0), fused_activation_function(ActivationFunctionType_NONE) {} + : axis(0), + fused_activation_function(ActivationFunctionType_NONE) { + } }; -struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ConcatenationOptionsT NativeTableType; - enum { VT_AXIS = 4, VT_FUSED_ACTIVATION_FUNCTION = 6 }; - int32_t axis() const { return GetField(VT_AXIS, 0); } + enum { + VT_AXIS = 4, + VT_FUSED_ACTIVATION_FUNCTION = 6 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -2253,14 +2149,9 @@ struct ConcatenationOptions FLATBUFFERS_FINAL_CLASS VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - ConcatenationOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - ConcatenationOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ConcatenationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ConcatenationOptionsBuilder { @@ -2269,13 +2160,11 @@ struct ConcatenationOptionsBuilder { void add_axis(int32_t axis) { fbb_.AddElement(ConcatenationOptions::VT_AXIS, axis, 0); } - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(ConcatenationOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit ConcatenationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ConcatenationOptionsBuilder &operator=(const ConcatenationOptionsBuilder &); @@ -2287,57 +2176,51 @@ struct ConcatenationOptionsBuilder { }; inline flatbuffers::Offset CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { ConcatenationOptionsBuilder builder_(_fbb); builder_.add_axis(axis); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct AddOptionsT : public flatbuffers::NativeTable { typedef AddOptions TableType; ActivationFunctionType fused_activation_function; - AddOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + AddOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct AddOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef AddOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - AddOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - AddOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + AddOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct AddOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(AddOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit AddOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } AddOptionsBuilder &operator=(const AddOptionsBuilder &); @@ -2350,55 +2233,48 @@ struct AddOptionsBuilder { inline flatbuffers::Offset CreateAddOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { AddOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateAddOptions( - flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct MulOptionsT : public flatbuffers::NativeTable { typedef MulOptions TableType; ActivationFunctionType fused_activation_function; - MulOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + MulOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct MulOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef MulOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - MulOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - MulOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + MulOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct MulOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(MulOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit MulOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } MulOptionsBuilder &operator=(const MulOptionsBuilder &); @@ -2411,55 +2287,48 @@ struct MulOptionsBuilder { inline flatbuffers::Offset CreateMulOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { MulOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateMulOptions( - flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct L2NormOptionsT : public flatbuffers::NativeTable { typedef L2NormOptions TableType; ActivationFunctionType fused_activation_function; - L2NormOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + L2NormOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct L2NormOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef L2NormOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - L2NormOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - L2NormOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + L2NormOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct L2NormOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(L2NormOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit L2NormOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } L2NormOptionsBuilder &operator=(const L2NormOptionsBuilder &); @@ -2472,16 +2341,13 @@ struct L2NormOptionsBuilder { inline flatbuffers::Offset CreateL2NormOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { L2NormOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateL2NormOptions( - flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable { typedef LocalResponseNormalizationOptions TableType; @@ -2490,61 +2356,66 @@ struct LocalResponseNormalizationOptionsT : public flatbuffers::NativeTable { float alpha; float beta; LocalResponseNormalizationOptionsT() - : radius(0), bias(0.0f), alpha(0.0f), beta(0.0f) {} + : radius(0), + bias(0.0f), + alpha(0.0f), + beta(0.0f) { + } }; -struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct LocalResponseNormalizationOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef LocalResponseNormalizationOptionsT NativeTableType; - enum { VT_RADIUS = 4, VT_BIAS = 6, VT_ALPHA = 8, VT_BETA = 10 }; - int32_t radius() const { return GetField(VT_RADIUS, 0); } - float bias() const { return GetField(VT_BIAS, 0.0f); } - float alpha() const { return GetField(VT_ALPHA, 0.0f); } - float beta() const { return GetField(VT_BETA, 0.0f); } + enum { + VT_RADIUS = 4, + VT_BIAS = 6, + VT_ALPHA = 8, + VT_BETA = 10 + }; + int32_t radius() const { + return GetField(VT_RADIUS, 0); + } + float bias() const { + return GetField(VT_BIAS, 0.0f); + } + float alpha() const { + return GetField(VT_ALPHA, 0.0f); + } + float beta() const { + return GetField(VT_BETA, 0.0f); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_RADIUS) && VerifyField(verifier, VT_BIAS) && VerifyField(verifier, VT_ALPHA) && - VerifyField(verifier, VT_BETA) && verifier.EndTable(); + VerifyField(verifier, VT_BETA) && + verifier.EndTable(); } - LocalResponseNormalizationOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - LocalResponseNormalizationOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const LocalResponseNormalizationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + LocalResponseNormalizationOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct LocalResponseNormalizationOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_radius(int32_t radius) { - fbb_.AddElement(LocalResponseNormalizationOptions::VT_RADIUS, - radius, 0); + fbb_.AddElement(LocalResponseNormalizationOptions::VT_RADIUS, radius, 0); } void add_bias(float bias) { - fbb_.AddElement(LocalResponseNormalizationOptions::VT_BIAS, bias, - 0.0f); + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BIAS, bias, 0.0f); } void add_alpha(float alpha) { - fbb_.AddElement(LocalResponseNormalizationOptions::VT_ALPHA, alpha, - 0.0f); + fbb_.AddElement(LocalResponseNormalizationOptions::VT_ALPHA, alpha, 0.0f); } void add_beta(float beta) { - fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, - 0.0f); + fbb_.AddElement(LocalResponseNormalizationOptions::VT_BETA, beta, 0.0f); } - explicit LocalResponseNormalizationOptionsBuilder( - flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit LocalResponseNormalizationOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - LocalResponseNormalizationOptionsBuilder &operator=( - const LocalResponseNormalizationOptionsBuilder &); + LocalResponseNormalizationOptionsBuilder &operator=(const LocalResponseNormalizationOptionsBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -2552,10 +2423,12 @@ struct LocalResponseNormalizationOptionsBuilder { } }; -inline flatbuffers::Offset -CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, - int32_t radius = 0, float bias = 0.0f, - float alpha = 0.0f, float beta = 0.0f) { +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t radius = 0, + float bias = 0.0f, + float alpha = 0.0f, + float beta = 0.0f) { LocalResponseNormalizationOptionsBuilder builder_(_fbb); builder_.add_beta(beta); builder_.add_alpha(alpha); @@ -2564,11 +2437,7 @@ CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, return builder_.Finish(); } -flatbuffers::Offset -CreateLocalResponseNormalizationOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const LocalResponseNormalizationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct LSTMOptionsT : public flatbuffers::NativeTable { typedef LSTMOptions TableType; @@ -2578,41 +2447,43 @@ struct LSTMOptionsT : public flatbuffers::NativeTable { LSTMOptionsT() : fused_activation_function(ActivationFunctionType_NONE), cell_clip(0.0f), - proj_clip(0.0f) {} + proj_clip(0.0f) { + } }; struct LSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef LSTMOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4, VT_CELL_CLIP = 6, VT_PROJ_CLIP = 8 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); } - float cell_clip() const { return GetField(VT_CELL_CLIP, 0.0f); } - float proj_clip() const { return GetField(VT_PROJ_CLIP, 0.0f); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && VerifyField(verifier, VT_CELL_CLIP) && - VerifyField(verifier, VT_PROJ_CLIP) && verifier.EndTable(); + VerifyField(verifier, VT_PROJ_CLIP) && + verifier.EndTable(); } - LSTMOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - LSTMOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + LSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct LSTMOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(LSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } void add_cell_clip(float cell_clip) { fbb_.AddElement(LSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); @@ -2621,7 +2492,7 @@ struct LSTMOptionsBuilder { fbb_.AddElement(LSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); } explicit LSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } LSTMOptionsBuilder &operator=(const LSTMOptionsBuilder &); @@ -2634,9 +2505,9 @@ struct LSTMOptionsBuilder { inline flatbuffers::Offset CreateLSTMOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE, - float cell_clip = 0.0f, float proj_clip = 0.0f) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f) { LSTMOptionsBuilder builder_(_fbb); builder_.add_proj_clip(proj_clip); builder_.add_cell_clip(cell_clip); @@ -2644,20 +2515,21 @@ inline flatbuffers::Offset CreateLSTMOptions( return builder_.Finish(); } -flatbuffers::Offset CreateLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ResizeBilinearOptionsT : public flatbuffers::NativeTable { typedef ResizeBilinearOptions TableType; bool align_corners; - ResizeBilinearOptionsT() : align_corners(false) {} + ResizeBilinearOptionsT() + : align_corners(false) { + } }; -struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ResizeBilinearOptionsT NativeTableType; - enum { VT_ALIGN_CORNERS = 8 }; + enum { + VT_ALIGN_CORNERS = 8 + }; bool align_corners() const { return GetField(VT_ALIGN_CORNERS, 0) != 0; } @@ -2666,25 +2538,19 @@ struct ResizeBilinearOptions FLATBUFFERS_FINAL_CLASS VerifyField(verifier, VT_ALIGN_CORNERS) && verifier.EndTable(); } - ResizeBilinearOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - ResizeBilinearOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ResizeBilinearOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ResizeBilinearOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_align_corners(bool align_corners) { - fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, - static_cast(align_corners), 0); + fbb_.AddElement(ResizeBilinearOptions::VT_ALIGN_CORNERS, static_cast(align_corners), 0); } explicit ResizeBilinearOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ResizeBilinearOptionsBuilder &operator=(const ResizeBilinearOptionsBuilder &); @@ -2696,38 +2562,39 @@ struct ResizeBilinearOptionsBuilder { }; inline flatbuffers::Offset CreateResizeBilinearOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool align_corners = false) { + flatbuffers::FlatBufferBuilder &_fbb, + bool align_corners = false) { ResizeBilinearOptionsBuilder builder_(_fbb); builder_.add_align_corners(align_corners); return builder_.Finish(); } -flatbuffers::Offset CreateResizeBilinearOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct CallOptionsT : public flatbuffers::NativeTable { typedef CallOptions TableType; uint32_t subgraph; - CallOptionsT() : subgraph(0) {} + CallOptionsT() + : subgraph(0) { + } }; struct CallOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef CallOptionsT NativeTableType; - enum { VT_SUBGRAPH = 4 }; - uint32_t subgraph() const { return GetField(VT_SUBGRAPH, 0); } + enum { + VT_SUBGRAPH = 4 + }; + uint32_t subgraph() const { + return GetField(VT_SUBGRAPH, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_SUBGRAPH) && verifier.EndTable(); + VerifyField(verifier, VT_SUBGRAPH) && + verifier.EndTable(); } - CallOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - CallOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + CallOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct CallOptionsBuilder { @@ -2737,7 +2604,7 @@ struct CallOptionsBuilder { fbb_.AddElement(CallOptions::VT_SUBGRAPH, subgraph, 0); } explicit CallOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } CallOptionsBuilder &operator=(const CallOptionsBuilder &); @@ -2749,41 +2616,37 @@ struct CallOptionsBuilder { }; inline flatbuffers::Offset CreateCallOptions( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t subgraph = 0) { + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t subgraph = 0) { CallOptionsBuilder builder_(_fbb); builder_.add_subgraph(subgraph); return builder_.Finish(); } -flatbuffers::Offset CreateCallOptions( - flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct PadOptionsT : public flatbuffers::NativeTable { typedef PadOptions TableType; - PadOptionsT() {} + PadOptionsT() { + } }; struct PadOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef PadOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && verifier.EndTable(); + return VerifyTableStart(verifier) && + verifier.EndTable(); } - PadOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - PadOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + PadOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct PadOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit PadOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } PadOptionsBuilder &operator=(const PadOptionsBuilder &); @@ -2800,45 +2663,42 @@ inline flatbuffers::Offset CreatePadOptions( return builder_.Finish(); } -flatbuffers::Offset CreatePadOptions( - flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ReshapeOptionsT : public flatbuffers::NativeTable { typedef ReshapeOptions TableType; std::vector new_shape; - ReshapeOptionsT() {} + ReshapeOptionsT() { + } }; struct ReshapeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ReshapeOptionsT NativeTableType; - enum { VT_NEW_SHAPE = 4 }; + enum { + VT_NEW_SHAPE = 4 + }; const flatbuffers::Vector *new_shape() const { return GetPointer *>(VT_NEW_SHAPE); } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_NEW_SHAPE) && - verifier.Verify(new_shape()) && verifier.EndTable(); + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NEW_SHAPE) && + verifier.Verify(new_shape()) && + verifier.EndTable(); } - ReshapeOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - ReshapeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ReshapeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ReshapeOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_new_shape( - flatbuffers::Offset> new_shape) { + void add_new_shape(flatbuffers::Offset> new_shape) { fbb_.AddOffset(ReshapeOptions::VT_NEW_SHAPE, new_shape); } explicit ReshapeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ReshapeOptionsBuilder &operator=(const ReshapeOptionsBuilder &); @@ -2861,39 +2721,34 @@ inline flatbuffers::Offset CreateReshapeOptionsDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *new_shape = nullptr) { return tflite::CreateReshapeOptions( - _fbb, new_shape ? _fbb.CreateVector(*new_shape) : 0); + _fbb, + new_shape ? _fbb.CreateVector(*new_shape) : 0); } -flatbuffers::Offset CreateReshapeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SpaceToBatchNDOptionsT : public flatbuffers::NativeTable { typedef SpaceToBatchNDOptions TableType; - SpaceToBatchNDOptionsT() {} + SpaceToBatchNDOptionsT() { + } }; -struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct SpaceToBatchNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SpaceToBatchNDOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && verifier.EndTable(); + return VerifyTableStart(verifier) && + verifier.EndTable(); } - SpaceToBatchNDOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SpaceToBatchNDOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SpaceToBatchNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SpaceToBatchNDOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit SpaceToBatchNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SpaceToBatchNDOptionsBuilder &operator=(const SpaceToBatchNDOptionsBuilder &); @@ -2910,36 +2765,30 @@ inline flatbuffers::Offset CreateSpaceToBatchNDOptions( return builder_.Finish(); } -flatbuffers::Offset CreateSpaceToBatchNDOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct BatchToSpaceNDOptionsT : public flatbuffers::NativeTable { typedef BatchToSpaceNDOptions TableType; - BatchToSpaceNDOptionsT() {} + BatchToSpaceNDOptionsT() { + } }; -struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct BatchToSpaceNDOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef BatchToSpaceNDOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && verifier.EndTable(); + return VerifyTableStart(verifier) && + verifier.EndTable(); } - BatchToSpaceNDOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - BatchToSpaceNDOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + BatchToSpaceNDOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct BatchToSpaceNDOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit BatchToSpaceNDOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } BatchToSpaceNDOptionsBuilder &operator=(const BatchToSpaceNDOptionsBuilder &); @@ -2956,9 +2805,7 @@ inline flatbuffers::Offset CreateBatchToSpaceNDOptions( return builder_.Finish(); } -flatbuffers::Offset CreateBatchToSpaceNDOptions( - flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SkipGramOptionsT : public flatbuffers::NativeTable { typedef SkipGramOptions TableType; @@ -2966,13 +2813,22 @@ struct SkipGramOptionsT : public flatbuffers::NativeTable { int32_t max_skip_size; bool include_all_ngrams; SkipGramOptionsT() - : ngram_size(0), max_skip_size(0), include_all_ngrams(false) {} + : ngram_size(0), + max_skip_size(0), + include_all_ngrams(false) { + } }; struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SkipGramOptionsT NativeTableType; - enum { VT_NGRAM_SIZE = 4, VT_MAX_SKIP_SIZE = 6, VT_INCLUDE_ALL_NGRAMS = 8 }; - int32_t ngram_size() const { return GetField(VT_NGRAM_SIZE, 0); } + enum { + VT_NGRAM_SIZE = 4, + VT_MAX_SKIP_SIZE = 6, + VT_INCLUDE_ALL_NGRAMS = 8 + }; + int32_t ngram_size() const { + return GetField(VT_NGRAM_SIZE, 0); + } int32_t max_skip_size() const { return GetField(VT_MAX_SKIP_SIZE, 0); } @@ -2986,14 +2842,9 @@ struct SkipGramOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VerifyField(verifier, VT_INCLUDE_ALL_NGRAMS) && verifier.EndTable(); } - SkipGramOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SkipGramOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SkipGramOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SkipGramOptionsBuilder { @@ -3003,15 +2854,13 @@ struct SkipGramOptionsBuilder { fbb_.AddElement(SkipGramOptions::VT_NGRAM_SIZE, ngram_size, 0); } void add_max_skip_size(int32_t max_skip_size) { - fbb_.AddElement(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, - 0); + fbb_.AddElement(SkipGramOptions::VT_MAX_SKIP_SIZE, max_skip_size, 0); } void add_include_all_ngrams(bool include_all_ngrams) { - fbb_.AddElement(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, - static_cast(include_all_ngrams), 0); + fbb_.AddElement(SkipGramOptions::VT_INCLUDE_ALL_NGRAMS, static_cast(include_all_ngrams), 0); } explicit SkipGramOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SkipGramOptionsBuilder &operator=(const SkipGramOptionsBuilder &); @@ -3023,8 +2872,10 @@ struct SkipGramOptionsBuilder { }; inline flatbuffers::Offset CreateSkipGramOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t ngram_size = 0, - int32_t max_skip_size = 0, bool include_all_ngrams = false) { + flatbuffers::FlatBufferBuilder &_fbb, + int32_t ngram_size = 0, + int32_t max_skip_size = 0, + bool include_all_ngrams = false) { SkipGramOptionsBuilder builder_(_fbb); builder_.add_max_skip_size(max_skip_size); builder_.add_ngram_size(ngram_size); @@ -3032,33 +2883,32 @@ inline flatbuffers::Offset CreateSkipGramOptions( return builder_.Finish(); } -flatbuffers::Offset CreateSkipGramOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SpaceToDepthOptionsT : public flatbuffers::NativeTable { typedef SpaceToDepthOptions TableType; int32_t block_size; - SpaceToDepthOptionsT() : block_size(0) {} + SpaceToDepthOptionsT() + : block_size(0) { + } }; -struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct SpaceToDepthOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SpaceToDepthOptionsT NativeTableType; - enum { VT_BLOCK_SIZE = 4 }; - int32_t block_size() const { return GetField(VT_BLOCK_SIZE, 0); } + enum { + VT_BLOCK_SIZE = 4 + }; + int32_t block_size() const { + return GetField(VT_BLOCK_SIZE, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_BLOCK_SIZE) && verifier.EndTable(); + VerifyField(verifier, VT_BLOCK_SIZE) && + verifier.EndTable(); } - SpaceToDepthOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SpaceToDepthOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SpaceToDepthOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SpaceToDepthOptionsBuilder { @@ -3068,7 +2918,7 @@ struct SpaceToDepthOptionsBuilder { fbb_.AddElement(SpaceToDepthOptions::VT_BLOCK_SIZE, block_size, 0); } explicit SpaceToDepthOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SpaceToDepthOptionsBuilder &operator=(const SpaceToDepthOptionsBuilder &); @@ -3080,54 +2930,49 @@ struct SpaceToDepthOptionsBuilder { }; inline flatbuffers::Offset CreateSpaceToDepthOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t block_size = 0) { + flatbuffers::FlatBufferBuilder &_fbb, + int32_t block_size = 0) { SpaceToDepthOptionsBuilder builder_(_fbb); builder_.add_block_size(block_size); return builder_.Finish(); } -flatbuffers::Offset CreateSpaceToDepthOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SubOptionsT : public flatbuffers::NativeTable { typedef SubOptions TableType; ActivationFunctionType fused_activation_function; - SubOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + SubOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct SubOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SubOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - SubOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SubOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SubOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SubOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(SubOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit SubOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SubOptionsBuilder &operator=(const SubOptionsBuilder &); @@ -3140,55 +2985,48 @@ struct SubOptionsBuilder { inline flatbuffers::Offset CreateSubOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { SubOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateSubOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct DivOptionsT : public flatbuffers::NativeTable { typedef DivOptions TableType; ActivationFunctionType fused_activation_function; - DivOptionsT() : fused_activation_function(ActivationFunctionType_NONE) {} + DivOptionsT() + : fused_activation_function(ActivationFunctionType_NONE) { + } }; struct DivOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef DivOptionsT NativeTableType; - enum { VT_FUSED_ACTIVATION_FUNCTION = 4 }; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4 + }; ActivationFunctionType fused_activation_function() const { - return static_cast( - GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && verifier.EndTable(); } - DivOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - DivOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + DivOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct DivOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_fused_activation_function( - ActivationFunctionType fused_activation_function) { - fbb_.AddElement(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, - static_cast(fused_activation_function), 0); + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(DivOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); } explicit DivOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } DivOptionsBuilder &operator=(const DivOptionsBuilder &); @@ -3201,59 +3039,91 @@ struct DivOptionsBuilder { inline flatbuffers::Offset CreateDivOptions( flatbuffers::FlatBufferBuilder &_fbb, - ActivationFunctionType fused_activation_function = - ActivationFunctionType_NONE) { + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE) { DivOptionsBuilder builder_(_fbb); builder_.add_fused_activation_function(fused_activation_function); return builder_.Finish(); } -flatbuffers::Offset CreateDivOptions( - flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct TopKV2OptionsT : public flatbuffers::NativeTable { + typedef TopKV2Options TableType; + TopKV2OptionsT() { + } +}; + +struct TopKV2Options FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef TopKV2OptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + TopKV2OptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct TopKV2OptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit TopKV2OptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + TopKV2OptionsBuilder &operator=(const TopKV2OptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateTopKV2Options( + flatbuffers::FlatBufferBuilder &_fbb) { + TopKV2OptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct EmbeddingLookupSparseOptionsT : public flatbuffers::NativeTable { typedef EmbeddingLookupSparseOptions TableType; CombinerType combiner; - EmbeddingLookupSparseOptionsT() : combiner(CombinerType_SUM) {} + EmbeddingLookupSparseOptionsT() + : combiner(CombinerType_SUM) { + } }; -struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct EmbeddingLookupSparseOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef EmbeddingLookupSparseOptionsT NativeTableType; - enum { VT_COMBINER = 4 }; + enum { + VT_COMBINER = 4 + }; CombinerType combiner() const { return static_cast(GetField(VT_COMBINER, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_COMBINER) && verifier.EndTable(); + VerifyField(verifier, VT_COMBINER) && + verifier.EndTable(); } - EmbeddingLookupSparseOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + EmbeddingLookupSparseOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct EmbeddingLookupSparseOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_combiner(CombinerType combiner) { - fbb_.AddElement(EmbeddingLookupSparseOptions::VT_COMBINER, - static_cast(combiner), 0); + fbb_.AddElement(EmbeddingLookupSparseOptions::VT_COMBINER, static_cast(combiner), 0); } - explicit EmbeddingLookupSparseOptionsBuilder( - flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + explicit EmbeddingLookupSparseOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } - EmbeddingLookupSparseOptionsBuilder &operator=( - const EmbeddingLookupSparseOptionsBuilder &); + EmbeddingLookupSparseOptionsBuilder &operator=(const EmbeddingLookupSparseOptionsBuilder &); flatbuffers::Offset Finish() { const auto end = fbb_.EndTable(start_); auto o = flatbuffers::Offset(end); @@ -3261,42 +3131,40 @@ struct EmbeddingLookupSparseOptionsBuilder { } }; -inline flatbuffers::Offset -CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, - CombinerType combiner = CombinerType_SUM) { +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions( + flatbuffers::FlatBufferBuilder &_fbb, + CombinerType combiner = CombinerType_SUM) { EmbeddingLookupSparseOptionsBuilder builder_(_fbb); builder_.add_combiner(combiner); return builder_.Finish(); } -flatbuffers::Offset -CreateEmbeddingLookupSparseOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct GatherOptionsT : public flatbuffers::NativeTable { typedef GatherOptions TableType; int32_t axis; - GatherOptionsT() : axis(0) {} + GatherOptionsT() + : axis(0) { + } }; struct GatherOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef GatherOptionsT NativeTableType; - enum { VT_AXIS = 4 }; - int32_t axis() const { return GetField(VT_AXIS, 0); } + enum { + VT_AXIS = 4 + }; + int32_t axis() const { + return GetField(VT_AXIS, 0); + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_AXIS) && verifier.EndTable(); + VerifyField(verifier, VT_AXIS) && + verifier.EndTable(); } - GatherOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - GatherOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + GatherOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct GatherOptionsBuilder { @@ -3306,7 +3174,7 @@ struct GatherOptionsBuilder { fbb_.AddElement(GatherOptions::VT_AXIS, axis, 0); } explicit GatherOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } GatherOptionsBuilder &operator=(const GatherOptionsBuilder &); @@ -3318,41 +3186,37 @@ struct GatherOptionsBuilder { }; inline flatbuffers::Offset CreateGatherOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t axis = 0) { + flatbuffers::FlatBufferBuilder &_fbb, + int32_t axis = 0) { GatherOptionsBuilder builder_(_fbb); builder_.add_axis(axis); return builder_.Finish(); } -flatbuffers::Offset CreateGatherOptions( - flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct TransposeOptionsT : public flatbuffers::NativeTable { typedef TransposeOptions TableType; - TransposeOptionsT() {} + TransposeOptionsT() { + } }; struct TransposeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef TransposeOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && verifier.EndTable(); + return VerifyTableStart(verifier) && + verifier.EndTable(); } - TransposeOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - TransposeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + TransposeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct TransposeOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit TransposeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } TransposeOptionsBuilder &operator=(const TransposeOptionsBuilder &); @@ -3369,35 +3233,30 @@ inline flatbuffers::Offset CreateTransposeOptions( return builder_.Finish(); } -flatbuffers::Offset CreateTransposeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ExpOptionsT : public flatbuffers::NativeTable { typedef ExpOptions TableType; - ExpOptionsT() {} + ExpOptionsT() { + } }; struct ExpOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef ExpOptionsT NativeTableType; bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && verifier.EndTable(); + return VerifyTableStart(verifier) && + verifier.EndTable(); } - ExpOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - ExpOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ExpOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ExpOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; explicit ExpOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ExpOptionsBuilder &operator=(const ExpOptionsBuilder &); @@ -3414,43 +3273,42 @@ inline flatbuffers::Offset CreateExpOptions( return builder_.Finish(); } -flatbuffers::Offset CreateExpOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct MeanOptionsT : public flatbuffers::NativeTable { typedef MeanOptions TableType; bool keep_dims; - MeanOptionsT() : keep_dims(false) {} + MeanOptionsT() + : keep_dims(false) { + } }; struct MeanOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef MeanOptionsT NativeTableType; - enum { VT_KEEP_DIMS = 4 }; - bool keep_dims() const { return GetField(VT_KEEP_DIMS, 0) != 0; } + enum { + VT_KEEP_DIMS = 4 + }; + bool keep_dims() const { + return GetField(VT_KEEP_DIMS, 0) != 0; + } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && - VerifyField(verifier, VT_KEEP_DIMS) && verifier.EndTable(); + VerifyField(verifier, VT_KEEP_DIMS) && + verifier.EndTable(); } - MeanOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - MeanOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + MeanOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(MeanOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct MeanOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_keep_dims(bool keep_dims) { - fbb_.AddElement(MeanOptions::VT_KEEP_DIMS, - static_cast(keep_dims), 0); + fbb_.AddElement(MeanOptions::VT_KEEP_DIMS, static_cast(keep_dims), 0); } explicit MeanOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } MeanOptionsBuilder &operator=(const MeanOptionsBuilder &); @@ -3462,52 +3320,49 @@ struct MeanOptionsBuilder { }; inline flatbuffers::Offset CreateMeanOptions( - flatbuffers::FlatBufferBuilder &_fbb, bool keep_dims = false) { + flatbuffers::FlatBufferBuilder &_fbb, + bool keep_dims = false) { MeanOptionsBuilder builder_(_fbb); builder_.add_keep_dims(keep_dims); return builder_.Finish(); } -flatbuffers::Offset CreateMeanOptions( - flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateMeanOptions(flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SqueezeOptionsT : public flatbuffers::NativeTable { typedef SqueezeOptions TableType; std::vector squeeze_dims; - SqueezeOptionsT() {} + SqueezeOptionsT() { + } }; struct SqueezeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef SqueezeOptionsT NativeTableType; - enum { VT_SQUEEZE_DIMS = 4 }; + enum { + VT_SQUEEZE_DIMS = 4 + }; const flatbuffers::Vector *squeeze_dims() const { return GetPointer *>(VT_SQUEEZE_DIMS); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_SQUEEZE_DIMS) && - verifier.Verify(squeeze_dims()) && verifier.EndTable(); + verifier.Verify(squeeze_dims()) && + verifier.EndTable(); } - SqueezeOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SqueezeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SqueezeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SqueezeOptionsBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_squeeze_dims( - flatbuffers::Offset> squeeze_dims) { + void add_squeeze_dims(flatbuffers::Offset> squeeze_dims) { fbb_.AddOffset(SqueezeOptions::VT_SQUEEZE_DIMS, squeeze_dims); } explicit SqueezeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SqueezeOptionsBuilder &operator=(const SqueezeOptionsBuilder &); @@ -3530,12 +3385,11 @@ inline flatbuffers::Offset CreateSqueezeOptionsDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *squeeze_dims = nullptr) { return tflite::CreateSqueezeOptions( - _fbb, squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0); + _fbb, + squeeze_dims ? _fbb.CreateVector(*squeeze_dims) : 0); } -flatbuffers::Offset CreateSqueezeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct StridedSliceOptionsT : public flatbuffers::NativeTable { typedef StridedSliceOptions TableType; @@ -3549,11 +3403,11 @@ struct StridedSliceOptionsT : public flatbuffers::NativeTable { end_mask(0), ellipsis_mask(0), new_axis_mask(0), - shrink_axis_mask(0) {} + shrink_axis_mask(0) { + } }; -struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS - : private flatbuffers::Table { +struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef StridedSliceOptionsT NativeTableType; enum { VT_BEGIN_MASK = 4, @@ -3562,8 +3416,12 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS VT_NEW_AXIS_MASK = 10, VT_SHRINK_AXIS_MASK = 12 }; - int32_t begin_mask() const { return GetField(VT_BEGIN_MASK, 0); } - int32_t end_mask() const { return GetField(VT_END_MASK, 0); } + int32_t begin_mask() const { + return GetField(VT_BEGIN_MASK, 0); + } + int32_t end_mask() const { + return GetField(VT_END_MASK, 0); + } int32_t ellipsis_mask() const { return GetField(VT_ELLIPSIS_MASK, 0); } @@ -3582,14 +3440,9 @@ struct StridedSliceOptions FLATBUFFERS_FINAL_CLASS VerifyField(verifier, VT_SHRINK_AXIS_MASK) && verifier.EndTable(); } - StridedSliceOptionsT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - StridedSliceOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + StridedSliceOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct StridedSliceOptionsBuilder { @@ -3602,19 +3455,16 @@ struct StridedSliceOptionsBuilder { fbb_.AddElement(StridedSliceOptions::VT_END_MASK, end_mask, 0); } void add_ellipsis_mask(int32_t ellipsis_mask) { - fbb_.AddElement(StridedSliceOptions::VT_ELLIPSIS_MASK, - ellipsis_mask, 0); + fbb_.AddElement(StridedSliceOptions::VT_ELLIPSIS_MASK, ellipsis_mask, 0); } void add_new_axis_mask(int32_t new_axis_mask) { - fbb_.AddElement(StridedSliceOptions::VT_NEW_AXIS_MASK, - new_axis_mask, 0); + fbb_.AddElement(StridedSliceOptions::VT_NEW_AXIS_MASK, new_axis_mask, 0); } void add_shrink_axis_mask(int32_t shrink_axis_mask) { - fbb_.AddElement(StridedSliceOptions::VT_SHRINK_AXIS_MASK, - shrink_axis_mask, 0); + fbb_.AddElement(StridedSliceOptions::VT_SHRINK_AXIS_MASK, shrink_axis_mask, 0); } explicit StridedSliceOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } StridedSliceOptionsBuilder &operator=(const StridedSliceOptionsBuilder &); @@ -3626,8 +3476,11 @@ struct StridedSliceOptionsBuilder { }; inline flatbuffers::Offset CreateStridedSliceOptions( - flatbuffers::FlatBufferBuilder &_fbb, int32_t begin_mask = 0, - int32_t end_mask = 0, int32_t ellipsis_mask = 0, int32_t new_axis_mask = 0, + flatbuffers::FlatBufferBuilder &_fbb, + int32_t begin_mask = 0, + int32_t end_mask = 0, + int32_t ellipsis_mask = 0, + int32_t new_axis_mask = 0, int32_t shrink_axis_mask = 0) { StridedSliceOptionsBuilder builder_(_fbb); builder_.add_shrink_axis_mask(shrink_axis_mask); @@ -3638,20 +3491,23 @@ inline flatbuffers::Offset CreateStridedSliceOptions( return builder_.Finish(); } -flatbuffers::Offset CreateStridedSliceOptions( - flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; std::string custom_code; - OperatorCodeT() : builtin_code(BuiltinOperator_ADD) {} + OperatorCodeT() + : builtin_code(BuiltinOperator_ADD) { + } }; struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef OperatorCodeT NativeTableType; - enum { VT_BUILTIN_CODE = 4, VT_CUSTOM_CODE = 6 }; + enum { + VT_BUILTIN_CODE = 4, + VT_CUSTOM_CODE = 6 + }; BuiltinOperator builtin_code() const { return static_cast(GetField(VT_BUILTIN_CODE, 0)); } @@ -3662,30 +3518,25 @@ struct OperatorCode FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return VerifyTableStart(verifier) && VerifyField(verifier, VT_BUILTIN_CODE) && VerifyOffset(verifier, VT_CUSTOM_CODE) && - verifier.Verify(custom_code()) && verifier.EndTable(); + verifier.Verify(custom_code()) && + verifier.EndTable(); } - OperatorCodeT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - OperatorCodeT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + OperatorCodeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct OperatorCodeBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; void add_builtin_code(BuiltinOperator builtin_code) { - fbb_.AddElement(OperatorCode::VT_BUILTIN_CODE, - static_cast(builtin_code), 0); + fbb_.AddElement(OperatorCode::VT_BUILTIN_CODE, static_cast(builtin_code), 0); } void add_custom_code(flatbuffers::Offset custom_code) { fbb_.AddOffset(OperatorCode::VT_CUSTOM_CODE, custom_code); } explicit OperatorCodeBuilder(flatbuffers::FlatBufferBuilder &_fbb) - : fbb_(_fbb) { + : fbb_(_fbb) { start_ = fbb_.StartTable(); } OperatorCodeBuilder &operator=(const OperatorCodeBuilder &); @@ -3711,12 +3562,12 @@ inline flatbuffers::Offset CreateOperatorCodeDirect( BuiltinOperator builtin_code = BuiltinOperator_ADD, const char *custom_code = nullptr) { return tflite::CreateOperatorCode( - _fbb, builtin_code, custom_code ? _fbb.CreateString(custom_code) : 0); + _fbb, + builtin_code, + custom_code ? _fbb.CreateString(custom_code) : 0); } -flatbuffers::Offset CreateOperatorCode( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct OperatorT : public flatbuffers::NativeTable { typedef Operator TableType; @@ -3728,7 +3579,8 @@ struct OperatorT : public flatbuffers::NativeTable { CustomOptionsFormat custom_options_format; OperatorT() : opcode_index(0), - custom_options_format(CustomOptionsFormat_FLEXBUFFERS) {} + custom_options_format(CustomOptionsFormat_FLEXBUFFERS) { + } }; struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -3752,408 +3604,276 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer *>(VT_OUTPUTS); } BuiltinOptions builtin_options_type() const { - return static_cast( - GetField(VT_BUILTIN_OPTIONS_TYPE, 0)); + return static_cast(GetField(VT_BUILTIN_OPTIONS_TYPE, 0)); } const void *builtin_options() const { return GetPointer(VT_BUILTIN_OPTIONS); } - template - const T *builtin_options_as() const; + template const T *builtin_options_as() const; const Conv2DOptions *builtin_options_as_Conv2DOptions() const { - return builtin_options_type() == BuiltinOptions_Conv2DOptions - ? static_cast(builtin_options()) - : nullptr; - } - const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() - const { - return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions - ? static_cast(builtin_options()) - : nullptr; - } - const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() - const { - return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_Conv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const DepthwiseConv2DOptions *builtin_options_as_DepthwiseConv2DOptions() const { + return builtin_options_type() == BuiltinOptions_DepthwiseConv2DOptions ? static_cast(builtin_options()) : nullptr; + } + const ConcatEmbeddingsOptions *builtin_options_as_ConcatEmbeddingsOptions() const { + return builtin_options_type() == BuiltinOptions_ConcatEmbeddingsOptions ? static_cast(builtin_options()) : nullptr; } const LSHProjectionOptions *builtin_options_as_LSHProjectionOptions() const { - return builtin_options_type() == BuiltinOptions_LSHProjectionOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_LSHProjectionOptions ? static_cast(builtin_options()) : nullptr; } const Pool2DOptions *builtin_options_as_Pool2DOptions() const { - return builtin_options_type() == BuiltinOptions_Pool2DOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_Pool2DOptions ? static_cast(builtin_options()) : nullptr; } const SVDFOptions *builtin_options_as_SVDFOptions() const { - return builtin_options_type() == BuiltinOptions_SVDFOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SVDFOptions ? static_cast(builtin_options()) : nullptr; } const RNNOptions *builtin_options_as_RNNOptions() const { - return builtin_options_type() == BuiltinOptions_RNNOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_RNNOptions ? static_cast(builtin_options()) : nullptr; } - const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() - const { - return builtin_options_type() == BuiltinOptions_FullyConnectedOptions - ? static_cast(builtin_options()) - : nullptr; + const FullyConnectedOptions *builtin_options_as_FullyConnectedOptions() const { + return builtin_options_type() == BuiltinOptions_FullyConnectedOptions ? static_cast(builtin_options()) : nullptr; } const SoftmaxOptions *builtin_options_as_SoftmaxOptions() const { - return builtin_options_type() == BuiltinOptions_SoftmaxOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SoftmaxOptions ? static_cast(builtin_options()) : nullptr; } const ConcatenationOptions *builtin_options_as_ConcatenationOptions() const { - return builtin_options_type() == BuiltinOptions_ConcatenationOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_ConcatenationOptions ? static_cast(builtin_options()) : nullptr; } const AddOptions *builtin_options_as_AddOptions() const { - return builtin_options_type() == BuiltinOptions_AddOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_AddOptions ? static_cast(builtin_options()) : nullptr; } const L2NormOptions *builtin_options_as_L2NormOptions() const { - return builtin_options_type() == BuiltinOptions_L2NormOptions - ? static_cast(builtin_options()) - : nullptr; - } - const LocalResponseNormalizationOptions * - builtin_options_as_LocalResponseNormalizationOptions() const { - return builtin_options_type() == - BuiltinOptions_LocalResponseNormalizationOptions - ? static_cast( - builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_L2NormOptions ? static_cast(builtin_options()) : nullptr; + } + const LocalResponseNormalizationOptions *builtin_options_as_LocalResponseNormalizationOptions() const { + return builtin_options_type() == BuiltinOptions_LocalResponseNormalizationOptions ? static_cast(builtin_options()) : nullptr; } const LSTMOptions *builtin_options_as_LSTMOptions() const { - return builtin_options_type() == BuiltinOptions_LSTMOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_LSTMOptions ? static_cast(builtin_options()) : nullptr; } - const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() - const { - return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions - ? static_cast(builtin_options()) - : nullptr; + const ResizeBilinearOptions *builtin_options_as_ResizeBilinearOptions() const { + return builtin_options_type() == BuiltinOptions_ResizeBilinearOptions ? static_cast(builtin_options()) : nullptr; } const CallOptions *builtin_options_as_CallOptions() const { - return builtin_options_type() == BuiltinOptions_CallOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_CallOptions ? static_cast(builtin_options()) : nullptr; } const ReshapeOptions *builtin_options_as_ReshapeOptions() const { - return builtin_options_type() == BuiltinOptions_ReshapeOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_ReshapeOptions ? static_cast(builtin_options()) : nullptr; } const SkipGramOptions *builtin_options_as_SkipGramOptions() const { - return builtin_options_type() == BuiltinOptions_SkipGramOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SkipGramOptions ? static_cast(builtin_options()) : nullptr; } const SpaceToDepthOptions *builtin_options_as_SpaceToDepthOptions() const { - return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SpaceToDepthOptions ? static_cast(builtin_options()) : nullptr; } - const EmbeddingLookupSparseOptions * - builtin_options_as_EmbeddingLookupSparseOptions() const { - return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions - ? static_cast( - builtin_options()) - : nullptr; + const EmbeddingLookupSparseOptions *builtin_options_as_EmbeddingLookupSparseOptions() const { + return builtin_options_type() == BuiltinOptions_EmbeddingLookupSparseOptions ? static_cast(builtin_options()) : nullptr; } const MulOptions *builtin_options_as_MulOptions() const { - return builtin_options_type() == BuiltinOptions_MulOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_MulOptions ? static_cast(builtin_options()) : nullptr; } const PadOptions *builtin_options_as_PadOptions() const { - return builtin_options_type() == BuiltinOptions_PadOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_PadOptions ? static_cast(builtin_options()) : nullptr; } const GatherOptions *builtin_options_as_GatherOptions() const { - return builtin_options_type() == BuiltinOptions_GatherOptions - ? static_cast(builtin_options()) - : nullptr; - } - const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() - const { - return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions - ? static_cast(builtin_options()) - : nullptr; - } - const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() - const { - return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_GatherOptions ? static_cast(builtin_options()) : nullptr; + } + const BatchToSpaceNDOptions *builtin_options_as_BatchToSpaceNDOptions() const { + return builtin_options_type() == BuiltinOptions_BatchToSpaceNDOptions ? static_cast(builtin_options()) : nullptr; + } + const SpaceToBatchNDOptions *builtin_options_as_SpaceToBatchNDOptions() const { + return builtin_options_type() == BuiltinOptions_SpaceToBatchNDOptions ? static_cast(builtin_options()) : nullptr; } const TransposeOptions *builtin_options_as_TransposeOptions() const { - return builtin_options_type() == BuiltinOptions_TransposeOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_TransposeOptions ? static_cast(builtin_options()) : nullptr; } const MeanOptions *builtin_options_as_MeanOptions() const { - return builtin_options_type() == BuiltinOptions_MeanOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_MeanOptions ? static_cast(builtin_options()) : nullptr; } const SubOptions *builtin_options_as_SubOptions() const { - return builtin_options_type() == BuiltinOptions_SubOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SubOptions ? static_cast(builtin_options()) : nullptr; } const DivOptions *builtin_options_as_DivOptions() const { - return builtin_options_type() == BuiltinOptions_DivOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_DivOptions ? static_cast(builtin_options()) : nullptr; } const SqueezeOptions *builtin_options_as_SqueezeOptions() const { - return builtin_options_type() == BuiltinOptions_SqueezeOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SqueezeOptions ? static_cast(builtin_options()) : nullptr; } const SequenceRNNOptions *builtin_options_as_SequenceRNNOptions() const { - return builtin_options_type() == BuiltinOptions_SequenceRNNOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_SequenceRNNOptions ? static_cast(builtin_options()) : nullptr; } const StridedSliceOptions *builtin_options_as_StridedSliceOptions() const { - return builtin_options_type() == BuiltinOptions_StridedSliceOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_StridedSliceOptions ? static_cast(builtin_options()) : nullptr; } const ExpOptions *builtin_options_as_ExpOptions() const { - return builtin_options_type() == BuiltinOptions_ExpOptions - ? static_cast(builtin_options()) - : nullptr; + return builtin_options_type() == BuiltinOptions_ExpOptions ? static_cast(builtin_options()) : nullptr; + } + const TopKV2Options *builtin_options_as_TopKV2Options() const { + return builtin_options_type() == BuiltinOptions_TopKV2Options ? static_cast(builtin_options()) : nullptr; } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } CustomOptionsFormat custom_options_format() const { - return static_cast( - GetField(VT_CUSTOM_OPTIONS_FORMAT, 0)); + return static_cast(GetField(VT_CUSTOM_OPTIONS_FORMAT, 0)); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && VerifyField(verifier, VT_OPCODE_INDEX) && - VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) && - VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.Verify(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.Verify(outputs()) && VerifyField(verifier, VT_BUILTIN_OPTIONS_TYPE) && VerifyOffset(verifier, VT_BUILTIN_OPTIONS) && - VerifyBuiltinOptions(verifier, builtin_options(), - builtin_options_type()) && + VerifyBuiltinOptions(verifier, builtin_options(), builtin_options_type()) && VerifyOffset(verifier, VT_CUSTOM_OPTIONS) && verifier.Verify(custom_options()) && VerifyField(verifier, VT_CUSTOM_OPTIONS_FORMAT) && verifier.EndTable(); } - OperatorT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - OperatorT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + OperatorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; -template <> -inline const Conv2DOptions *Operator::builtin_options_as() - const { +template<> inline const Conv2DOptions *Operator::builtin_options_as() const { return builtin_options_as_Conv2DOptions(); } -template <> -inline const DepthwiseConv2DOptions * -Operator::builtin_options_as() const { +template<> inline const DepthwiseConv2DOptions *Operator::builtin_options_as() const { return builtin_options_as_DepthwiseConv2DOptions(); } -template <> -inline const ConcatEmbeddingsOptions * -Operator::builtin_options_as() const { +template<> inline const ConcatEmbeddingsOptions *Operator::builtin_options_as() const { return builtin_options_as_ConcatEmbeddingsOptions(); } -template <> -inline const LSHProjectionOptions * -Operator::builtin_options_as() const { +template<> inline const LSHProjectionOptions *Operator::builtin_options_as() const { return builtin_options_as_LSHProjectionOptions(); } -template <> -inline const Pool2DOptions *Operator::builtin_options_as() - const { +template<> inline const Pool2DOptions *Operator::builtin_options_as() const { return builtin_options_as_Pool2DOptions(); } -template <> -inline const SVDFOptions *Operator::builtin_options_as() const { +template<> inline const SVDFOptions *Operator::builtin_options_as() const { return builtin_options_as_SVDFOptions(); } -template <> -inline const RNNOptions *Operator::builtin_options_as() const { +template<> inline const RNNOptions *Operator::builtin_options_as() const { return builtin_options_as_RNNOptions(); } -template <> -inline const FullyConnectedOptions * -Operator::builtin_options_as() const { +template<> inline const FullyConnectedOptions *Operator::builtin_options_as() const { return builtin_options_as_FullyConnectedOptions(); } -template <> -inline const SoftmaxOptions *Operator::builtin_options_as() - const { +template<> inline const SoftmaxOptions *Operator::builtin_options_as() const { return builtin_options_as_SoftmaxOptions(); } -template <> -inline const ConcatenationOptions * -Operator::builtin_options_as() const { +template<> inline const ConcatenationOptions *Operator::builtin_options_as() const { return builtin_options_as_ConcatenationOptions(); } -template <> -inline const AddOptions *Operator::builtin_options_as() const { +template<> inline const AddOptions *Operator::builtin_options_as() const { return builtin_options_as_AddOptions(); } -template <> -inline const L2NormOptions *Operator::builtin_options_as() - const { +template<> inline const L2NormOptions *Operator::builtin_options_as() const { return builtin_options_as_L2NormOptions(); } -template <> -inline const LocalResponseNormalizationOptions * -Operator::builtin_options_as() const { +template<> inline const LocalResponseNormalizationOptions *Operator::builtin_options_as() const { return builtin_options_as_LocalResponseNormalizationOptions(); } -template <> -inline const LSTMOptions *Operator::builtin_options_as() const { +template<> inline const LSTMOptions *Operator::builtin_options_as() const { return builtin_options_as_LSTMOptions(); } -template <> -inline const ResizeBilinearOptions * -Operator::builtin_options_as() const { +template<> inline const ResizeBilinearOptions *Operator::builtin_options_as() const { return builtin_options_as_ResizeBilinearOptions(); } -template <> -inline const CallOptions *Operator::builtin_options_as() const { +template<> inline const CallOptions *Operator::builtin_options_as() const { return builtin_options_as_CallOptions(); } -template <> -inline const ReshapeOptions *Operator::builtin_options_as() - const { +template<> inline const ReshapeOptions *Operator::builtin_options_as() const { return builtin_options_as_ReshapeOptions(); } -template <> -inline const SkipGramOptions *Operator::builtin_options_as() - const { +template<> inline const SkipGramOptions *Operator::builtin_options_as() const { return builtin_options_as_SkipGramOptions(); } -template <> -inline const SpaceToDepthOptions * -Operator::builtin_options_as() const { +template<> inline const SpaceToDepthOptions *Operator::builtin_options_as() const { return builtin_options_as_SpaceToDepthOptions(); } -template <> -inline const EmbeddingLookupSparseOptions * -Operator::builtin_options_as() const { +template<> inline const EmbeddingLookupSparseOptions *Operator::builtin_options_as() const { return builtin_options_as_EmbeddingLookupSparseOptions(); } -template <> -inline const MulOptions *Operator::builtin_options_as() const { +template<> inline const MulOptions *Operator::builtin_options_as() const { return builtin_options_as_MulOptions(); } -template <> -inline const PadOptions *Operator::builtin_options_as() const { +template<> inline const PadOptions *Operator::builtin_options_as() const { return builtin_options_as_PadOptions(); } -template <> -inline const GatherOptions *Operator::builtin_options_as() - const { +template<> inline const GatherOptions *Operator::builtin_options_as() const { return builtin_options_as_GatherOptions(); } -template <> -inline const BatchToSpaceNDOptions * -Operator::builtin_options_as() const { +template<> inline const BatchToSpaceNDOptions *Operator::builtin_options_as() const { return builtin_options_as_BatchToSpaceNDOptions(); } -template <> -inline const SpaceToBatchNDOptions * -Operator::builtin_options_as() const { +template<> inline const SpaceToBatchNDOptions *Operator::builtin_options_as() const { return builtin_options_as_SpaceToBatchNDOptions(); } -template <> -inline const TransposeOptions *Operator::builtin_options_as() - const { +template<> inline const TransposeOptions *Operator::builtin_options_as() const { return builtin_options_as_TransposeOptions(); } -template <> -inline const MeanOptions *Operator::builtin_options_as() const { +template<> inline const MeanOptions *Operator::builtin_options_as() const { return builtin_options_as_MeanOptions(); } -template <> -inline const SubOptions *Operator::builtin_options_as() const { +template<> inline const SubOptions *Operator::builtin_options_as() const { return builtin_options_as_SubOptions(); } -template <> -inline const DivOptions *Operator::builtin_options_as() const { +template<> inline const DivOptions *Operator::builtin_options_as() const { return builtin_options_as_DivOptions(); } -template <> -inline const SqueezeOptions *Operator::builtin_options_as() - const { +template<> inline const SqueezeOptions *Operator::builtin_options_as() const { return builtin_options_as_SqueezeOptions(); } -template <> -inline const SequenceRNNOptions * -Operator::builtin_options_as() const { +template<> inline const SequenceRNNOptions *Operator::builtin_options_as() const { return builtin_options_as_SequenceRNNOptions(); } -template <> -inline const StridedSliceOptions * -Operator::builtin_options_as() const { +template<> inline const StridedSliceOptions *Operator::builtin_options_as() const { return builtin_options_as_StridedSliceOptions(); } -template <> -inline const ExpOptions *Operator::builtin_options_as() const { +template<> inline const ExpOptions *Operator::builtin_options_as() const { return builtin_options_as_ExpOptions(); } +template<> inline const TopKV2Options *Operator::builtin_options_as() const { + return builtin_options_as_TopKV2Options(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -4167,21 +3887,19 @@ struct OperatorBuilder { fbb_.AddOffset(Operator::VT_OUTPUTS, outputs); } void add_builtin_options_type(BuiltinOptions builtin_options_type) { - fbb_.AddElement(Operator::VT_BUILTIN_OPTIONS_TYPE, - static_cast(builtin_options_type), 0); + fbb_.AddElement(Operator::VT_BUILTIN_OPTIONS_TYPE, static_cast(builtin_options_type), 0); } void add_builtin_options(flatbuffers::Offset builtin_options) { fbb_.AddOffset(Operator::VT_BUILTIN_OPTIONS, builtin_options); } - void add_custom_options( - flatbuffers::Offset> custom_options) { + void add_custom_options(flatbuffers::Offset> custom_options) { fbb_.AddOffset(Operator::VT_CUSTOM_OPTIONS, custom_options); } void add_custom_options_format(CustomOptionsFormat custom_options_format) { - fbb_.AddElement(Operator::VT_CUSTOM_OPTIONS_FORMAT, - static_cast(custom_options_format), 0); + fbb_.AddElement(Operator::VT_CUSTOM_OPTIONS_FORMAT, static_cast(custom_options_format), 0); } - explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { + explicit OperatorBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } OperatorBuilder &operator=(const OperatorBuilder &); @@ -4193,14 +3911,14 @@ struct OperatorBuilder { }; inline flatbuffers::Offset CreateOperator( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, flatbuffers::Offset> inputs = 0, flatbuffers::Offset> outputs = 0, BuiltinOptions builtin_options_type = BuiltinOptions_NONE, flatbuffers::Offset builtin_options = 0, flatbuffers::Offset> custom_options = 0, - CustomOptionsFormat custom_options_format = - CustomOptionsFormat_FLEXBUFFERS) { + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) { OperatorBuilder builder_(_fbb); builder_.add_custom_options(custom_options); builder_.add_builtin_options(builtin_options); @@ -4213,25 +3931,26 @@ inline flatbuffers::Offset CreateOperator( } inline flatbuffers::Offset CreateOperatorDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t opcode_index = 0, + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t opcode_index = 0, const std::vector *inputs = nullptr, const std::vector *outputs = nullptr, BuiltinOptions builtin_options_type = BuiltinOptions_NONE, flatbuffers::Offset builtin_options = 0, const std::vector *custom_options = nullptr, - CustomOptionsFormat custom_options_format = - CustomOptionsFormat_FLEXBUFFERS) { + CustomOptionsFormat custom_options_format = CustomOptionsFormat_FLEXBUFFERS) { return tflite::CreateOperator( - _fbb, opcode_index, inputs ? _fbb.CreateVector(*inputs) : 0, - outputs ? _fbb.CreateVector(*outputs) : 0, builtin_options_type, + _fbb, + opcode_index, + inputs ? _fbb.CreateVector(*inputs) : 0, + outputs ? _fbb.CreateVector(*outputs) : 0, + builtin_options_type, builtin_options, custom_options ? _fbb.CreateVector(*custom_options) : 0, custom_options_format); } -flatbuffers::Offset CreateOperator( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct SubGraphT : public flatbuffers::NativeTable { typedef SubGraph TableType; @@ -4240,7 +3959,8 @@ struct SubGraphT : public flatbuffers::NativeTable { std::vector outputs; std::vector> operators; std::string name; - SubGraphT() {} + SubGraphT() { + } }; struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -4253,8 +3973,7 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_NAME = 12 }; const flatbuffers::Vector> *tensors() const { - return GetPointer> *>( - VT_TENSORS); + return GetPointer> *>(VT_TENSORS); } const flatbuffers::Vector *inputs() const { return GetPointer *>(VT_INPUTS); @@ -4263,41 +3982,36 @@ struct SubGraph FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { return GetPointer *>(VT_OUTPUTS); } const flatbuffers::Vector> *operators() const { - return GetPointer< - const flatbuffers::Vector> *>( - VT_OPERATORS); + return GetPointer> *>(VT_OPERATORS); } const flatbuffers::String *name() const { return GetPointer(VT_NAME); } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_TENSORS) && + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TENSORS) && verifier.Verify(tensors()) && verifier.VerifyVectorOfTables(tensors()) && - VerifyOffset(verifier, VT_INPUTS) && verifier.Verify(inputs()) && - VerifyOffset(verifier, VT_OUTPUTS) && verifier.Verify(outputs()) && + VerifyOffset(verifier, VT_INPUTS) && + verifier.Verify(inputs()) && + VerifyOffset(verifier, VT_OUTPUTS) && + verifier.Verify(outputs()) && VerifyOffset(verifier, VT_OPERATORS) && verifier.Verify(operators()) && verifier.VerifyVectorOfTables(operators()) && - VerifyOffset(verifier, VT_NAME) && verifier.Verify(name()) && + VerifyOffset(verifier, VT_NAME) && + verifier.Verify(name()) && verifier.EndTable(); } - SubGraphT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo( - SubGraphT *_o, - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + SubGraphT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct SubGraphBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; - void add_tensors( - flatbuffers::Offset>> - tensors) { + void add_tensors(flatbuffers::Offset>> tensors) { fbb_.AddOffset(SubGraph::VT_TENSORS, tensors); } void add_inputs(flatbuffers::Offset> inputs) { @@ -4306,15 +4020,14 @@ struct SubGraphBuilder { void add_outputs(flatbuffers::Offset> outputs) { fbb_.AddOffset(SubGraph::VT_OUTPUTS, outputs); } - void add_operators( - flatbuffers::Offset>> - operators) { + void add_operators(flatbuffers::Offset>> operators) { fbb_.AddOffset(SubGraph::VT_OPERATORS, operators); } void add_name(flatbuffers::Offset name) { fbb_.AddOffset(SubGraph::VT_NAME, name); } - explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { + explicit SubGraphBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } SubGraphBuilder &operator=(const SubGraphBuilder &); @@ -4327,12 +4040,10 @@ struct SubGraphBuilder { inline flatbuffers::Offset CreateSubGraph( flatbuffers::FlatBufferBuilder &_fbb, - flatbuffers::Offset>> - tensors = 0, + flatbuffers::Offset>> tensors = 0, flatbuffers::Offset> inputs = 0, flatbuffers::Offset> outputs = 0, - flatbuffers::Offset>> - operators = 0, + flatbuffers::Offset>> operators = 0, flatbuffers::Offset name = 0) { SubGraphBuilder builder_(_fbb); builder_.add_name(name); @@ -4355,38 +4066,36 @@ inline flatbuffers::Offset CreateSubGraphDirect( tensors ? _fbb.CreateVector>(*tensors) : 0, inputs ? _fbb.CreateVector(*inputs) : 0, outputs ? _fbb.CreateVector(*outputs) : 0, - operators ? _fbb.CreateVector>(*operators) - : 0, + operators ? _fbb.CreateVector>(*operators) : 0, name ? _fbb.CreateString(name) : 0); } -flatbuffers::Offset CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct BufferT : public flatbuffers::NativeTable { typedef Buffer TableType; std::vector data; - BufferT() {} + BufferT() { + } }; struct Buffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { typedef BufferT NativeTableType; - enum { VT_DATA = 4 }; + enum { + VT_DATA = 4 + }; const flatbuffers::Vector *data() const { return GetPointer *>(VT_DATA); } bool Verify(flatbuffers::Verifier &verifier) const { - return VerifyTableStart(verifier) && VerifyOffset(verifier, VT_DATA) && - verifier.Verify(data()) && verifier.EndTable(); + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.Verify(data()) && + verifier.EndTable(); } - BufferT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = - nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + BufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct BufferBuilder { @@ -4395,7 +4104,8 @@ struct BufferBuilder { void add_data(flatbuffers::Offset> data) { fbb_.AddOffset(Buffer::VT_DATA, data); } - explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { + explicit BufferBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } BufferBuilder &operator=(const BufferBuilder &); @@ -4417,13 +4127,12 @@ inline flatbuffers::Offset CreateBuffer( inline flatbuffers::Offset CreateBufferDirect( flatbuffers::FlatBufferBuilder &_fbb, const std::vector *data = nullptr) { - return tflite::CreateBuffer(_fbb, - data ? _fbb.CreateVector(*data) : 0); + return tflite::CreateBuffer( + _fbb, + data ? _fbb.CreateVector(*data) : 0); } -flatbuffers::Offset CreateBuffer( - flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); struct ModelT : public flatbuffers::NativeTable { typedef Model TableType; @@ -4432,7 +4141,9 @@ struct ModelT : public flatbuffers::NativeTable { std::vector> subgraphs; std::string description; std::vector> buffers; - ModelT() : version(0) {} + ModelT() + : version(0) { + } }; struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { @@ -4444,24 +4155,20 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { VT_DESCRIPTION = 10, VT_BUFFERS = 12 }; - uint32_t version() const { return GetField(VT_VERSION, 0); } - const flatbuffers::Vector> *operator_codes() - const { - return GetPointer< - const flatbuffers::Vector> *>( - VT_OPERATOR_CODES); + uint32_t version() const { + return GetField(VT_VERSION, 0); + } + const flatbuffers::Vector> *operator_codes() const { + return GetPointer> *>(VT_OPERATOR_CODES); } const flatbuffers::Vector> *subgraphs() const { - return GetPointer< - const flatbuffers::Vector> *>( - VT_SUBGRAPHS); + return GetPointer> *>(VT_SUBGRAPHS); } const flatbuffers::String *description() const { return GetPointer(VT_DESCRIPTION); } const flatbuffers::Vector> *buffers() const { - return GetPointer> *>( - VT_BUFFERS); + return GetPointer> *>(VT_BUFFERS); } bool Verify(flatbuffers::Verifier &verifier) const { return VerifyTableStart(verifier) && @@ -4474,16 +4181,14 @@ struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyVectorOfTables(subgraphs()) && VerifyOffset(verifier, VT_DESCRIPTION) && verifier.Verify(description()) && - VerifyOffset(verifier, VT_BUFFERS) && verifier.Verify(buffers()) && - verifier.VerifyVectorOfTables(buffers()) && verifier.EndTable(); + VerifyOffset(verifier, VT_BUFFERS) && + verifier.Verify(buffers()) && + verifier.VerifyVectorOfTables(buffers()) && + verifier.EndTable(); } - ModelT *UnPack( - const flatbuffers::resolver_function_t *_resolver = nullptr) const; - void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = - nullptr) const; - static flatbuffers::Offset Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); + ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); }; struct ModelBuilder { @@ -4492,26 +4197,20 @@ struct ModelBuilder { void add_version(uint32_t version) { fbb_.AddElement(Model::VT_VERSION, version, 0); } - void add_operator_codes( - flatbuffers::Offset< - flatbuffers::Vector>> - operator_codes) { + void add_operator_codes(flatbuffers::Offset>> operator_codes) { fbb_.AddOffset(Model::VT_OPERATOR_CODES, operator_codes); } - void add_subgraphs( - flatbuffers::Offset>> - subgraphs) { + void add_subgraphs(flatbuffers::Offset>> subgraphs) { fbb_.AddOffset(Model::VT_SUBGRAPHS, subgraphs); } void add_description(flatbuffers::Offset description) { fbb_.AddOffset(Model::VT_DESCRIPTION, description); } - void add_buffers( - flatbuffers::Offset>> - buffers) { + void add_buffers(flatbuffers::Offset>> buffers) { fbb_.AddOffset(Model::VT_BUFFERS, buffers); } - explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { + explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { start_ = fbb_.StartTable(); } ModelBuilder &operator=(const ModelBuilder &); @@ -4523,14 +4222,12 @@ struct ModelBuilder { }; inline flatbuffers::Offset CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - flatbuffers::Offset>> - operator_codes = 0, - flatbuffers::Offset>> - subgraphs = 0, + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + flatbuffers::Offset>> operator_codes = 0, + flatbuffers::Offset>> subgraphs = 0, flatbuffers::Offset description = 0, - flatbuffers::Offset>> - buffers = 0) { + flatbuffers::Offset>> buffers = 0) { ModelBuilder builder_(_fbb); builder_.add_buffers(buffers); builder_.add_description(description); @@ -4541,2048 +4238,1251 @@ inline flatbuffers::Offset CreateModel( } inline flatbuffers::Offset CreateModelDirect( - flatbuffers::FlatBufferBuilder &_fbb, uint32_t version = 0, - const std::vector> *operator_codes = - nullptr, + flatbuffers::FlatBufferBuilder &_fbb, + uint32_t version = 0, + const std::vector> *operator_codes = nullptr, const std::vector> *subgraphs = nullptr, const char *description = nullptr, const std::vector> *buffers = nullptr) { return tflite::CreateModel( - _fbb, version, - operator_codes ? _fbb.CreateVector>( - *operator_codes) - : 0, - subgraphs ? _fbb.CreateVector>(*subgraphs) - : 0, + _fbb, + version, + operator_codes ? _fbb.CreateVector>(*operator_codes) : 0, + subgraphs ? _fbb.CreateVector>(*subgraphs) : 0, description ? _fbb.CreateString(description) : 0, buffers ? _fbb.CreateVector>(*buffers) : 0); } -flatbuffers::Offset CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, - const flatbuffers::rehasher_function_t *_rehasher = nullptr); +flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); -inline QuantizationParametersT *QuantizationParameters::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline QuantizationParametersT *QuantizationParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new QuantizationParametersT(); UnPackTo(_o, _resolver); return _o; } -inline void QuantizationParameters::UnPackTo( - QuantizationParametersT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void QuantizationParameters::UnPackTo(QuantizationParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = min(); - if (_e) { - _o->min.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->min[_i] = _e->Get(_i); - } - } - }; - { - auto _e = max(); - if (_e) { - _o->max.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->max[_i] = _e->Get(_i); - } - } - }; - { - auto _e = scale(); - if (_e) { - _o->scale.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->scale[_i] = _e->Get(_i); - } - } - }; - { - auto _e = zero_point(); - if (_e) { - _o->zero_point.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->zero_point[_i] = _e->Get(_i); - } - } - }; + { auto _e = min(); if (_e) { _o->min.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->min[_i] = _e->Get(_i); } } }; + { auto _e = max(); if (_e) { _o->max.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->max[_i] = _e->Get(_i); } } }; + { auto _e = scale(); if (_e) { _o->scale.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->scale[_i] = _e->Get(_i); } } }; + { auto _e = zero_point(); if (_e) { _o->zero_point.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->zero_point[_i] = _e->Get(_i); } } }; } -inline flatbuffers::Offset QuantizationParameters::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset QuantizationParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateQuantizationParameters(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateQuantizationParameters( - flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const QuantizationParametersT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const QuantizationParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _min = _o->min.size() ? _fbb.CreateVector(_o->min) : 0; auto _max = _o->max.size() ? _fbb.CreateVector(_o->max) : 0; auto _scale = _o->scale.size() ? _fbb.CreateVector(_o->scale) : 0; - auto _zero_point = - _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0; - return tflite::CreateQuantizationParameters(_fbb, _min, _max, _scale, - _zero_point); + auto _zero_point = _o->zero_point.size() ? _fbb.CreateVector(_o->zero_point) : 0; + return tflite::CreateQuantizationParameters( + _fbb, + _min, + _max, + _scale, + _zero_point); } -inline TensorT *Tensor::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new TensorT(); UnPackTo(_o, _resolver); return _o; } -inline void Tensor::UnPackTo( - TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = shape(); - if (_e) { - _o->shape.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->shape[_i] = _e->Get(_i); - } - } - }; - { - auto _e = type(); - _o->type = _e; - }; - { - auto _e = buffer(); - _o->buffer = _e; - }; - { - auto _e = name(); - if (_e) _o->name = _e->str(); - }; - { - auto _e = quantization(); - if (_e) - _o->quantization = - std::unique_ptr(_e->UnPack(_resolver)); - }; + { auto _e = shape(); if (_e) { _o->shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->shape[_i] = _e->Get(_i); } } }; + { auto _e = type(); _o->type = _e; }; + { auto _e = buffer(); _o->buffer = _e; }; + { auto _e = name(); if (_e) _o->name = _e->str(); }; + { auto _e = quantization(); if (_e) _o->quantization = std::unique_ptr(_e->UnPack(_resolver)); }; } -inline flatbuffers::Offset Tensor::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateTensor(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateTensor( - flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const TensorT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TensorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _shape = _o->shape.size() ? _fbb.CreateVector(_o->shape) : 0; auto _type = _o->type; auto _buffer = _o->buffer; auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); - auto _quantization = _o->quantization - ? CreateQuantizationParameters( - _fbb, _o->quantization.get(), _rehasher) - : 0; - return tflite::CreateTensor(_fbb, _shape, _type, _buffer, _name, - _quantization); + auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0; + return tflite::CreateTensor( + _fbb, + _shape, + _type, + _buffer, + _name, + _quantization); } -inline Conv2DOptionsT *Conv2DOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new Conv2DOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void Conv2DOptions::UnPackTo( - Conv2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void Conv2DOptions::UnPackTo(Conv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = padding(); - _o->padding = _e; - }; - { - auto _e = stride_w(); - _o->stride_w = _e; - }; - { - auto _e = stride_h(); - _o->stride_h = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset Conv2DOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Conv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateConv2DOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Conv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const Conv2DOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Conv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _padding = _o->padding; auto _stride_w = _o->stride_w; auto _stride_h = _o->stride_h; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateConv2DOptions(_fbb, _padding, _stride_w, _stride_h, - _fused_activation_function); + return tflite::CreateConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _fused_activation_function); } -inline Pool2DOptionsT *Pool2DOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline Pool2DOptionsT *Pool2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new Pool2DOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void Pool2DOptions::UnPackTo( - Pool2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void Pool2DOptions::UnPackTo(Pool2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = padding(); - _o->padding = _e; - }; - { - auto _e = stride_w(); - _o->stride_w = _e; - }; - { - auto _e = stride_h(); - _o->stride_h = _e; - }; - { - auto _e = filter_width(); - _o->filter_width = _e; - }; - { - auto _e = filter_height(); - _o->filter_height = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = filter_width(); _o->filter_width = _e; }; + { auto _e = filter_height(); _o->filter_height = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset Pool2DOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Pool2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreatePool2DOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreatePool2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreatePool2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const Pool2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const Pool2DOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const Pool2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _padding = _o->padding; auto _stride_w = _o->stride_w; auto _stride_h = _o->stride_h; auto _filter_width = _o->filter_width; auto _filter_height = _o->filter_height; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreatePool2DOptions(_fbb, _padding, _stride_w, _stride_h, - _filter_width, _filter_height, - _fused_activation_function); + return tflite::CreatePool2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _filter_width, + _filter_height, + _fused_activation_function); } -inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline DepthwiseConv2DOptionsT *DepthwiseConv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new DepthwiseConv2DOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void DepthwiseConv2DOptions::UnPackTo( - DepthwiseConv2DOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void DepthwiseConv2DOptions::UnPackTo(DepthwiseConv2DOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = padding(); - _o->padding = _e; - }; - { - auto _e = stride_w(); - _o->stride_w = _e; - }; - { - auto _e = stride_h(); - _o->stride_h = _e; - }; - { - auto _e = depth_multiplier(); - _o->depth_multiplier = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = padding(); _o->padding = _e; }; + { auto _e = stride_w(); _o->stride_w = _e; }; + { auto _e = stride_h(); _o->stride_h = _e; }; + { auto _e = depth_multiplier(); _o->depth_multiplier = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset DepthwiseConv2DOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset DepthwiseConv2DOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateDepthwiseConv2DOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateDepthwiseConv2DOptions( - flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateDepthwiseConv2DOptions(flatbuffers::FlatBufferBuilder &_fbb, const DepthwiseConv2DOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const DepthwiseConv2DOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DepthwiseConv2DOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _padding = _o->padding; auto _stride_w = _o->stride_w; auto _stride_h = _o->stride_h; auto _depth_multiplier = _o->depth_multiplier; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateDepthwiseConv2DOptions(_fbb, _padding, _stride_w, - _stride_h, _depth_multiplier, - _fused_activation_function); + return tflite::CreateDepthwiseConv2DOptions( + _fbb, + _padding, + _stride_w, + _stride_h, + _depth_multiplier, + _fused_activation_function); } -inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ConcatEmbeddingsOptionsT *ConcatEmbeddingsOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ConcatEmbeddingsOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void ConcatEmbeddingsOptions::UnPackTo( - ConcatEmbeddingsOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void ConcatEmbeddingsOptions::UnPackTo(ConcatEmbeddingsOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = num_channels(); - _o->num_channels = _e; - }; - { - auto _e = num_columns_per_channel(); - if (_e) { - _o->num_columns_per_channel.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->num_columns_per_channel[_i] = _e->Get(_i); - } - } - }; - { - auto _e = embedding_dim_per_channel(); - if (_e) { - _o->embedding_dim_per_channel.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->embedding_dim_per_channel[_i] = _e->Get(_i); - } - } - }; + { auto _e = num_channels(); _o->num_channels = _e; }; + { auto _e = num_columns_per_channel(); if (_e) { _o->num_columns_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->num_columns_per_channel[_i] = _e->Get(_i); } } }; + { auto _e = embedding_dim_per_channel(); if (_e) { _o->embedding_dim_per_channel.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_dim_per_channel[_i] = _e->Get(_i); } } }; } -inline flatbuffers::Offset -ConcatEmbeddingsOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset ConcatEmbeddingsOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateConcatEmbeddingsOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset -CreateConcatEmbeddingsOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateConcatEmbeddingsOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatEmbeddingsOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ConcatEmbeddingsOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatEmbeddingsOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _num_channels = _o->num_channels; - auto _num_columns_per_channel = - _o->num_columns_per_channel.size() - ? _fbb.CreateVector(_o->num_columns_per_channel) - : 0; - auto _embedding_dim_per_channel = - _o->embedding_dim_per_channel.size() - ? _fbb.CreateVector(_o->embedding_dim_per_channel) - : 0; - return tflite::CreateConcatEmbeddingsOptions(_fbb, _num_channels, - _num_columns_per_channel, - _embedding_dim_per_channel); -} - -inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { + auto _num_columns_per_channel = _o->num_columns_per_channel.size() ? _fbb.CreateVector(_o->num_columns_per_channel) : 0; + auto _embedding_dim_per_channel = _o->embedding_dim_per_channel.size() ? _fbb.CreateVector(_o->embedding_dim_per_channel) : 0; + return tflite::CreateConcatEmbeddingsOptions( + _fbb, + _num_channels, + _num_columns_per_channel, + _embedding_dim_per_channel); +} + +inline LSHProjectionOptionsT *LSHProjectionOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new LSHProjectionOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void LSHProjectionOptions::UnPackTo( - LSHProjectionOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void LSHProjectionOptions::UnPackTo(LSHProjectionOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = type(); - _o->type = _e; - }; + { auto _e = type(); _o->type = _e; }; } -inline flatbuffers::Offset LSHProjectionOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset LSHProjectionOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateLSHProjectionOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateLSHProjectionOptions( - flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateLSHProjectionOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSHProjectionOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const LSHProjectionOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSHProjectionOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _type = _o->type; - return tflite::CreateLSHProjectionOptions(_fbb, _type); + return tflite::CreateLSHProjectionOptions( + _fbb, + _type); } -inline SVDFOptionsT *SVDFOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SVDFOptionsT *SVDFOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SVDFOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SVDFOptions::UnPackTo( - SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void SVDFOptions::UnPackTo(SVDFOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = rank(); - _o->rank = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = rank(); _o->rank = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset SVDFOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SVDFOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSVDFOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSVDFOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSVDFOptions(flatbuffers::FlatBufferBuilder &_fbb, const SVDFOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SVDFOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SVDFOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _rank = _o->rank; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateSVDFOptions(_fbb, _rank, _fused_activation_function); + return tflite::CreateSVDFOptions( + _fbb, + _rank, + _fused_activation_function); } -inline RNNOptionsT *RNNOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline RNNOptionsT *RNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new RNNOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void RNNOptions::UnPackTo( - RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void RNNOptions::UnPackTo(RNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset RNNOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset RNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateRNNOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const RNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const RNNOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateRNNOptions(_fbb, _fused_activation_function); + return tflite::CreateRNNOptions( + _fbb, + _fused_activation_function); } -inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SequenceRNNOptionsT *SequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SequenceRNNOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SequenceRNNOptions::UnPackTo( - SequenceRNNOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SequenceRNNOptions::UnPackTo(SequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = time_major(); - _o->time_major = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = time_major(); _o->time_major = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset SequenceRNNOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSequenceRNNOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const SequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SequenceRNNOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _time_major = _o->time_major; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateSequenceRNNOptions(_fbb, _time_major, - _fused_activation_function); + return tflite::CreateSequenceRNNOptions( + _fbb, + _time_major, + _fused_activation_function); } -inline BidirectionalSequenceRNNOptionsT * -BidirectionalSequenceRNNOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline BidirectionalSequenceRNNOptionsT *BidirectionalSequenceRNNOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new BidirectionalSequenceRNNOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void BidirectionalSequenceRNNOptions::UnPackTo( - BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void BidirectionalSequenceRNNOptions::UnPackTo(BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = time_major(); - _o->time_major = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = time_major(); _o->time_major = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset -BidirectionalSequenceRNNOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset BidirectionalSequenceRNNOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateBidirectionalSequenceRNNOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset -CreateBidirectionalSequenceRNNOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const BidirectionalSequenceRNNOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateBidirectionalSequenceRNNOptions(flatbuffers::FlatBufferBuilder &_fbb, const BidirectionalSequenceRNNOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const BidirectionalSequenceRNNOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BidirectionalSequenceRNNOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _time_major = _o->time_major; auto _fused_activation_function = _o->fused_activation_function; return tflite::CreateBidirectionalSequenceRNNOptions( - _fbb, _time_major, _fused_activation_function); + _fbb, + _time_major, + _fused_activation_function); } -inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline FullyConnectedOptionsT *FullyConnectedOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new FullyConnectedOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void FullyConnectedOptions::UnPackTo( - FullyConnectedOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void FullyConnectedOptions::UnPackTo(FullyConnectedOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset FullyConnectedOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset FullyConnectedOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateFullyConnectedOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateFullyConnectedOptions( - flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateFullyConnectedOptions(flatbuffers::FlatBufferBuilder &_fbb, const FullyConnectedOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const FullyConnectedOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FullyConnectedOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateFullyConnectedOptions(_fbb, _fused_activation_function); + return tflite::CreateFullyConnectedOptions( + _fbb, + _fused_activation_function); } -inline SoftmaxOptionsT *SoftmaxOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SoftmaxOptionsT *SoftmaxOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SoftmaxOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SoftmaxOptions::UnPackTo( - SoftmaxOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SoftmaxOptions::UnPackTo(SoftmaxOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = beta(); - _o->beta = _e; - }; + { auto _e = beta(); _o->beta = _e; }; } -inline flatbuffers::Offset SoftmaxOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SoftmaxOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSoftmaxOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSoftmaxOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const SoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SoftmaxOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SoftmaxOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _beta = _o->beta; - return tflite::CreateSoftmaxOptions(_fbb, _beta); + return tflite::CreateSoftmaxOptions( + _fbb, + _beta); } -inline ConcatenationOptionsT *ConcatenationOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ConcatenationOptionsT *ConcatenationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ConcatenationOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void ConcatenationOptions::UnPackTo( - ConcatenationOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void ConcatenationOptions::UnPackTo(ConcatenationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = axis(); - _o->axis = _e; - }; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = axis(); _o->axis = _e; }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset ConcatenationOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset ConcatenationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateConcatenationOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateConcatenationOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateConcatenationOptions(flatbuffers::FlatBufferBuilder &_fbb, const ConcatenationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ConcatenationOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ConcatenationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _axis = _o->axis; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateConcatenationOptions(_fbb, _axis, - _fused_activation_function); + return tflite::CreateConcatenationOptions( + _fbb, + _axis, + _fused_activation_function); } -inline AddOptionsT *AddOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline AddOptionsT *AddOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new AddOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void AddOptions::UnPackTo( - AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void AddOptions::UnPackTo(AddOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset AddOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset AddOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateAddOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateAddOptions( - flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateAddOptions(flatbuffers::FlatBufferBuilder &_fbb, const AddOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const AddOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AddOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateAddOptions(_fbb, _fused_activation_function); + return tflite::CreateAddOptions( + _fbb, + _fused_activation_function); } -inline MulOptionsT *MulOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline MulOptionsT *MulOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new MulOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void MulOptions::UnPackTo( - MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void MulOptions::UnPackTo(MulOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset MulOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset MulOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateMulOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateMulOptions( - flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateMulOptions(flatbuffers::FlatBufferBuilder &_fbb, const MulOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const MulOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MulOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateMulOptions(_fbb, _fused_activation_function); + return tflite::CreateMulOptions( + _fbb, + _fused_activation_function); } -inline L2NormOptionsT *L2NormOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline L2NormOptionsT *L2NormOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new L2NormOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void L2NormOptions::UnPackTo( - L2NormOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void L2NormOptions::UnPackTo(L2NormOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset L2NormOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset L2NormOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateL2NormOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateL2NormOptions( - flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateL2NormOptions(flatbuffers::FlatBufferBuilder &_fbb, const L2NormOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const L2NormOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const L2NormOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateL2NormOptions(_fbb, _fused_activation_function); + return tflite::CreateL2NormOptions( + _fbb, + _fused_activation_function); } -inline LocalResponseNormalizationOptionsT * -LocalResponseNormalizationOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline LocalResponseNormalizationOptionsT *LocalResponseNormalizationOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new LocalResponseNormalizationOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void LocalResponseNormalizationOptions::UnPackTo( - LocalResponseNormalizationOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void LocalResponseNormalizationOptions::UnPackTo(LocalResponseNormalizationOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = radius(); - _o->radius = _e; - }; - { - auto _e = bias(); - _o->bias = _e; - }; - { - auto _e = alpha(); - _o->alpha = _e; - }; - { - auto _e = beta(); - _o->beta = _e; - }; + { auto _e = radius(); _o->radius = _e; }; + { auto _e = bias(); _o->bias = _e; }; + { auto _e = alpha(); _o->alpha = _e; }; + { auto _e = beta(); _o->beta = _e; }; } -inline flatbuffers::Offset -LocalResponseNormalizationOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const LocalResponseNormalizationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset LocalResponseNormalizationOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateLocalResponseNormalizationOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset -CreateLocalResponseNormalizationOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const LocalResponseNormalizationOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateLocalResponseNormalizationOptions(flatbuffers::FlatBufferBuilder &_fbb, const LocalResponseNormalizationOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const LocalResponseNormalizationOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LocalResponseNormalizationOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _radius = _o->radius; auto _bias = _o->bias; auto _alpha = _o->alpha; auto _beta = _o->beta; - return tflite::CreateLocalResponseNormalizationOptions(_fbb, _radius, _bias, - _alpha, _beta); + return tflite::CreateLocalResponseNormalizationOptions( + _fbb, + _radius, + _bias, + _alpha, + _beta); } -inline LSTMOptionsT *LSTMOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline LSTMOptionsT *LSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new LSTMOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void LSTMOptions::UnPackTo( - LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void LSTMOptions::UnPackTo(LSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; - { - auto _e = cell_clip(); - _o->cell_clip = _e; - }; - { - auto _e = proj_clip(); - _o->proj_clip = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; + { auto _e = cell_clip(); _o->cell_clip = _e; }; + { auto _e = proj_clip(); _o->proj_clip = _e; }; } -inline flatbuffers::Offset LSTMOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset LSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateLSTMOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateLSTMOptions( - flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const LSTMOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const LSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; auto _cell_clip = _o->cell_clip; auto _proj_clip = _o->proj_clip; - return tflite::CreateLSTMOptions(_fbb, _fused_activation_function, _cell_clip, - _proj_clip); + return tflite::CreateLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip); } -inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ResizeBilinearOptionsT *ResizeBilinearOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ResizeBilinearOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void ResizeBilinearOptions::UnPackTo( - ResizeBilinearOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void ResizeBilinearOptions::UnPackTo(ResizeBilinearOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = align_corners(); - _o->align_corners = _e; - }; + { auto _e = align_corners(); _o->align_corners = _e; }; } -inline flatbuffers::Offset ResizeBilinearOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset ResizeBilinearOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateResizeBilinearOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateResizeBilinearOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateResizeBilinearOptions(flatbuffers::FlatBufferBuilder &_fbb, const ResizeBilinearOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ResizeBilinearOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ResizeBilinearOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _align_corners = _o->align_corners; - return tflite::CreateResizeBilinearOptions(_fbb, _align_corners); + return tflite::CreateResizeBilinearOptions( + _fbb, + _align_corners); } -inline CallOptionsT *CallOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline CallOptionsT *CallOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new CallOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void CallOptions::UnPackTo( - CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void CallOptions::UnPackTo(CallOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = subgraph(); - _o->subgraph = _e; - }; + { auto _e = subgraph(); _o->subgraph = _e; }; } -inline flatbuffers::Offset CallOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CallOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateCallOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateCallOptions( - flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateCallOptions(flatbuffers::FlatBufferBuilder &_fbb, const CallOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const CallOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CallOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _subgraph = _o->subgraph; - return tflite::CreateCallOptions(_fbb, _subgraph); + return tflite::CreateCallOptions( + _fbb, + _subgraph); } -inline PadOptionsT *PadOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline PadOptionsT *PadOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new PadOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void PadOptions::UnPackTo( - PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void PadOptions::UnPackTo(PadOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset PadOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset PadOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreatePadOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreatePadOptions( - flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreatePadOptions(flatbuffers::FlatBufferBuilder &_fbb, const PadOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const PadOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - return tflite::CreatePadOptions(_fbb); + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PadOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreatePadOptions( + _fbb); } -inline ReshapeOptionsT *ReshapeOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ReshapeOptionsT *ReshapeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ReshapeOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void ReshapeOptions::UnPackTo( - ReshapeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void ReshapeOptions::UnPackTo(ReshapeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = new_shape(); - if (_e) { - _o->new_shape.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->new_shape[_i] = _e->Get(_i); - } - } - }; + { auto _e = new_shape(); if (_e) { _o->new_shape.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->new_shape[_i] = _e->Get(_i); } } }; } -inline flatbuffers::Offset ReshapeOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset ReshapeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateReshapeOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateReshapeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateReshapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ReshapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ReshapeOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ReshapeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _new_shape = _o->new_shape.size() ? _fbb.CreateVector(_o->new_shape) : 0; - return tflite::CreateReshapeOptions(_fbb, _new_shape); + return tflite::CreateReshapeOptions( + _fbb, + _new_shape); } -inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SpaceToBatchNDOptionsT *SpaceToBatchNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SpaceToBatchNDOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SpaceToBatchNDOptions::UnPackTo( - SpaceToBatchNDOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SpaceToBatchNDOptions::UnPackTo(SpaceToBatchNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset SpaceToBatchNDOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SpaceToBatchNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSpaceToBatchNDOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSpaceToBatchNDOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSpaceToBatchNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToBatchNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SpaceToBatchNDOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - return tflite::CreateSpaceToBatchNDOptions(_fbb); + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToBatchNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateSpaceToBatchNDOptions( + _fbb); } -inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline BatchToSpaceNDOptionsT *BatchToSpaceNDOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new BatchToSpaceNDOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void BatchToSpaceNDOptions::UnPackTo( - BatchToSpaceNDOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void BatchToSpaceNDOptions::UnPackTo(BatchToSpaceNDOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset BatchToSpaceNDOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset BatchToSpaceNDOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateBatchToSpaceNDOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateBatchToSpaceNDOptions( - flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateBatchToSpaceNDOptions(flatbuffers::FlatBufferBuilder &_fbb, const BatchToSpaceNDOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const BatchToSpaceNDOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - return tflite::CreateBatchToSpaceNDOptions(_fbb); + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BatchToSpaceNDOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateBatchToSpaceNDOptions( + _fbb); } -inline SkipGramOptionsT *SkipGramOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SkipGramOptionsT *SkipGramOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SkipGramOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SkipGramOptions::UnPackTo( - SkipGramOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SkipGramOptions::UnPackTo(SkipGramOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = ngram_size(); - _o->ngram_size = _e; - }; - { - auto _e = max_skip_size(); - _o->max_skip_size = _e; - }; - { - auto _e = include_all_ngrams(); - _o->include_all_ngrams = _e; - }; + { auto _e = ngram_size(); _o->ngram_size = _e; }; + { auto _e = max_skip_size(); _o->max_skip_size = _e; }; + { auto _e = include_all_ngrams(); _o->include_all_ngrams = _e; }; } -inline flatbuffers::Offset SkipGramOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SkipGramOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSkipGramOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSkipGramOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSkipGramOptions(flatbuffers::FlatBufferBuilder &_fbb, const SkipGramOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SkipGramOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SkipGramOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _ngram_size = _o->ngram_size; auto _max_skip_size = _o->max_skip_size; auto _include_all_ngrams = _o->include_all_ngrams; - return tflite::CreateSkipGramOptions(_fbb, _ngram_size, _max_skip_size, - _include_all_ngrams); + return tflite::CreateSkipGramOptions( + _fbb, + _ngram_size, + _max_skip_size, + _include_all_ngrams); } -inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SpaceToDepthOptionsT *SpaceToDepthOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SpaceToDepthOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SpaceToDepthOptions::UnPackTo( - SpaceToDepthOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SpaceToDepthOptions::UnPackTo(SpaceToDepthOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = block_size(); - _o->block_size = _e; - }; + { auto _e = block_size(); _o->block_size = _e; }; } -inline flatbuffers::Offset SpaceToDepthOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SpaceToDepthOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSpaceToDepthOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSpaceToDepthOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSpaceToDepthOptions(flatbuffers::FlatBufferBuilder &_fbb, const SpaceToDepthOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SpaceToDepthOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SpaceToDepthOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _block_size = _o->block_size; - return tflite::CreateSpaceToDepthOptions(_fbb, _block_size); + return tflite::CreateSpaceToDepthOptions( + _fbb, + _block_size); } -inline SubOptionsT *SubOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SubOptionsT *SubOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SubOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SubOptions::UnPackTo( - SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void SubOptions::UnPackTo(SubOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset SubOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SubOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSubOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSubOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSubOptions(flatbuffers::FlatBufferBuilder &_fbb, const SubOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SubOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateSubOptions(_fbb, _fused_activation_function); + return tflite::CreateSubOptions( + _fbb, + _fused_activation_function); } -inline DivOptionsT *DivOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline DivOptionsT *DivOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new DivOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void DivOptions::UnPackTo( - DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void DivOptions::UnPackTo(DivOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = fused_activation_function(); - _o->fused_activation_function = _e; - }; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; } -inline flatbuffers::Offset DivOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset DivOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateDivOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateDivOptions( - flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateDivOptions(flatbuffers::FlatBufferBuilder &_fbb, const DivOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const DivOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DivOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _fused_activation_function = _o->fused_activation_function; - return tflite::CreateDivOptions(_fbb, _fused_activation_function); + return tflite::CreateDivOptions( + _fbb, + _fused_activation_function); +} + +inline TopKV2OptionsT *TopKV2Options::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new TopKV2OptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void TopKV2Options::UnPackTo(TopKV2OptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset TopKV2Options::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateTopKV2Options(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateTopKV2Options(flatbuffers::FlatBufferBuilder &_fbb, const TopKV2OptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TopKV2OptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTopKV2Options( + _fbb); } -inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline EmbeddingLookupSparseOptionsT *EmbeddingLookupSparseOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new EmbeddingLookupSparseOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void EmbeddingLookupSparseOptions::UnPackTo( - EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void EmbeddingLookupSparseOptions::UnPackTo(EmbeddingLookupSparseOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = combiner(); - _o->combiner = _e; - }; + { auto _e = combiner(); _o->combiner = _e; }; } -inline flatbuffers::Offset -EmbeddingLookupSparseOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset EmbeddingLookupSparseOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateEmbeddingLookupSparseOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset -CreateEmbeddingLookupSparseOptions( - flatbuffers::FlatBufferBuilder &_fbb, - const EmbeddingLookupSparseOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateEmbeddingLookupSparseOptions(flatbuffers::FlatBufferBuilder &_fbb, const EmbeddingLookupSparseOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const EmbeddingLookupSparseOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const EmbeddingLookupSparseOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _combiner = _o->combiner; - return tflite::CreateEmbeddingLookupSparseOptions(_fbb, _combiner); + return tflite::CreateEmbeddingLookupSparseOptions( + _fbb, + _combiner); } -inline GatherOptionsT *GatherOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline GatherOptionsT *GatherOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new GatherOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void GatherOptions::UnPackTo( - GatherOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void GatherOptions::UnPackTo(GatherOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = axis(); - _o->axis = _e; - }; + { auto _e = axis(); _o->axis = _e; }; } -inline flatbuffers::Offset GatherOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset GatherOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateGatherOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateGatherOptions( - flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateGatherOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const GatherOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const GatherOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _axis = _o->axis; - return tflite::CreateGatherOptions(_fbb, _axis); + return tflite::CreateGatherOptions( + _fbb, + _axis); } -inline TransposeOptionsT *TransposeOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline TransposeOptionsT *TransposeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new TransposeOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void TransposeOptions::UnPackTo( - TransposeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void TransposeOptions::UnPackTo(TransposeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset TransposeOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset TransposeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateTransposeOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateTransposeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateTransposeOptions(flatbuffers::FlatBufferBuilder &_fbb, const TransposeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const TransposeOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - return tflite::CreateTransposeOptions(_fbb); + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TransposeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateTransposeOptions( + _fbb); } -inline ExpOptionsT *ExpOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ExpOptionsT *ExpOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ExpOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void ExpOptions::UnPackTo( - ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void ExpOptions::UnPackTo(ExpOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; } -inline flatbuffers::Offset ExpOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset ExpOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateExpOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateExpOptions( - flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateExpOptions(flatbuffers::FlatBufferBuilder &_fbb, const ExpOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ExpOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - return tflite::CreateExpOptions(_fbb); + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ExpOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateExpOptions( + _fbb); } -inline MeanOptionsT *MeanOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline MeanOptionsT *MeanOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new MeanOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void MeanOptions::UnPackTo( - MeanOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void MeanOptions::UnPackTo(MeanOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = keep_dims(); - _o->keep_dims = _e; - }; + { auto _e = keep_dims(); _o->keep_dims = _e; }; } -inline flatbuffers::Offset MeanOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset MeanOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateMeanOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateMeanOptions( - flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateMeanOptions(flatbuffers::FlatBufferBuilder &_fbb, const MeanOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const MeanOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const MeanOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _keep_dims = _o->keep_dims; - return tflite::CreateMeanOptions(_fbb, _keep_dims); + return tflite::CreateMeanOptions( + _fbb, + _keep_dims); } -inline SqueezeOptionsT *SqueezeOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SqueezeOptionsT *SqueezeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SqueezeOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void SqueezeOptions::UnPackTo( - SqueezeOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void SqueezeOptions::UnPackTo(SqueezeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = squeeze_dims(); - if (_e) { - _o->squeeze_dims.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->squeeze_dims[_i] = _e->Get(_i); - } - } - }; + { auto _e = squeeze_dims(); if (_e) { _o->squeeze_dims.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->squeeze_dims[_i] = _e->Get(_i); } } }; } -inline flatbuffers::Offset SqueezeOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SqueezeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSqueezeOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSqueezeOptions( - flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SqueezeOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - auto _squeeze_dims = - _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0; - return tflite::CreateSqueezeOptions(_fbb, _squeeze_dims); -} - -inline StridedSliceOptionsT *StridedSliceOptions::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SqueezeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _squeeze_dims = _o->squeeze_dims.size() ? _fbb.CreateVector(_o->squeeze_dims) : 0; + return tflite::CreateSqueezeOptions( + _fbb, + _squeeze_dims); +} + +inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new StridedSliceOptionsT(); UnPackTo(_o, _resolver); return _o; } -inline void StridedSliceOptions::UnPackTo( - StridedSliceOptionsT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void StridedSliceOptions::UnPackTo(StridedSliceOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = begin_mask(); - _o->begin_mask = _e; - }; - { - auto _e = end_mask(); - _o->end_mask = _e; - }; - { - auto _e = ellipsis_mask(); - _o->ellipsis_mask = _e; - }; - { - auto _e = new_axis_mask(); - _o->new_axis_mask = _e; - }; - { - auto _e = shrink_axis_mask(); - _o->shrink_axis_mask = _e; - }; + { auto _e = begin_mask(); _o->begin_mask = _e; }; + { auto _e = end_mask(); _o->end_mask = _e; }; + { auto _e = ellipsis_mask(); _o->ellipsis_mask = _e; }; + { auto _e = new_axis_mask(); _o->new_axis_mask = _e; }; + { auto _e = shrink_axis_mask(); _o->shrink_axis_mask = _e; }; } -inline flatbuffers::Offset StridedSliceOptions::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset StridedSliceOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateStridedSliceOptions(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateStridedSliceOptions( - flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateStridedSliceOptions(flatbuffers::FlatBufferBuilder &_fbb, const StridedSliceOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const StridedSliceOptionsT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const StridedSliceOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _begin_mask = _o->begin_mask; auto _end_mask = _o->end_mask; auto _ellipsis_mask = _o->ellipsis_mask; auto _new_axis_mask = _o->new_axis_mask; auto _shrink_axis_mask = _o->shrink_axis_mask; - return tflite::CreateStridedSliceOptions(_fbb, _begin_mask, _end_mask, - _ellipsis_mask, _new_axis_mask, - _shrink_axis_mask); + return tflite::CreateStridedSliceOptions( + _fbb, + _begin_mask, + _end_mask, + _ellipsis_mask, + _new_axis_mask, + _shrink_axis_mask); } -inline OperatorCodeT *OperatorCode::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); return _o; } -inline void OperatorCode::UnPackTo( - OperatorCodeT *_o, - const flatbuffers::resolver_function_t *_resolver) const { +inline void OperatorCode::UnPackTo(OperatorCodeT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = builtin_code(); - _o->builtin_code = _e; - }; - { - auto _e = custom_code(); - if (_e) _o->custom_code = _e->str(); - }; + { auto _e = builtin_code(); _o->builtin_code = _e; }; + { auto _e = custom_code(); if (_e) _o->custom_code = _e->str(); }; } -inline flatbuffers::Offset OperatorCode::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset OperatorCode::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateOperatorCode(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateOperatorCode( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateOperatorCode(flatbuffers::FlatBufferBuilder &_fbb, const OperatorCodeT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const OperatorCodeT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorCodeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _builtin_code = _o->builtin_code; - auto _custom_code = - _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code); - return tflite::CreateOperatorCode(_fbb, _builtin_code, _custom_code); + auto _custom_code = _o->custom_code.empty() ? 0 : _fbb.CreateString(_o->custom_code); + return tflite::CreateOperatorCode( + _fbb, + _builtin_code, + _custom_code); } -inline OperatorT *Operator::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline OperatorT *Operator::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorT(); UnPackTo(_o, _resolver); return _o; } -inline void Operator::UnPackTo( - OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void Operator::UnPackTo(OperatorT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = opcode_index(); - _o->opcode_index = _e; - }; - { - auto _e = inputs(); - if (_e) { - _o->inputs.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->inputs[_i] = _e->Get(_i); - } - } - }; - { - auto _e = outputs(); - if (_e) { - _o->outputs.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->outputs[_i] = _e->Get(_i); - } - } - }; - { - auto _e = builtin_options_type(); - _o->builtin_options.type = _e; - }; - { - auto _e = builtin_options(); - if (_e) - _o->builtin_options.value = - BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); - }; - { - auto _e = custom_options(); - if (_e) { - _o->custom_options.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->custom_options[_i] = _e->Get(_i); - } - } - }; - { - auto _e = custom_options_format(); - _o->custom_options_format = _e; - }; + { auto _e = opcode_index(); _o->opcode_index = _e; }; + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }; + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }; + { auto _e = builtin_options_type(); _o->builtin_options.type = _e; }; + { auto _e = builtin_options(); if (_e) _o->builtin_options.value = BuiltinOptionsUnion::UnPack(_e, builtin_options_type(), _resolver); }; + { auto _e = custom_options(); if (_e) { _o->custom_options.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->custom_options[_i] = _e->Get(_i); } } }; + { auto _e = custom_options_format(); _o->custom_options_format = _e; }; } -inline flatbuffers::Offset Operator::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Operator::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateOperator(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateOperator( - flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateOperator(flatbuffers::FlatBufferBuilder &_fbb, const OperatorT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const OperatorT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OperatorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _opcode_index = _o->opcode_index; auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; auto _builtin_options_type = _o->builtin_options.type; auto _builtin_options = _o->builtin_options.Pack(_fbb); - auto _custom_options = - _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0; + auto _custom_options = _o->custom_options.size() ? _fbb.CreateVector(_o->custom_options) : 0; auto _custom_options_format = _o->custom_options_format; - return tflite::CreateOperator(_fbb, _opcode_index, _inputs, _outputs, - _builtin_options_type, _builtin_options, - _custom_options, _custom_options_format); + return tflite::CreateOperator( + _fbb, + _opcode_index, + _inputs, + _outputs, + _builtin_options_type, + _builtin_options, + _custom_options, + _custom_options_format); } -inline SubGraphT *SubGraph::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline SubGraphT *SubGraph::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new SubGraphT(); UnPackTo(_o, _resolver); return _o; } -inline void SubGraph::UnPackTo( - SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void SubGraph::UnPackTo(SubGraphT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = tensors(); - if (_e) { - _o->tensors.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->tensors[_i] = - std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); - } - } - }; - { - auto _e = inputs(); - if (_e) { - _o->inputs.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->inputs[_i] = _e->Get(_i); - } - } - }; - { - auto _e = outputs(); - if (_e) { - _o->outputs.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->outputs[_i] = _e->Get(_i); - } - } - }; - { - auto _e = operators(); - if (_e) { - _o->operators.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->operators[_i] = - std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); - } - } - }; - { - auto _e = name(); - if (_e) _o->name = _e->str(); - }; + { auto _e = tensors(); if (_e) { _o->tensors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->tensors[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = inputs(); if (_e) { _o->inputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->inputs[_i] = _e->Get(_i); } } }; + { auto _e = outputs(); if (_e) { _o->outputs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->outputs[_i] = _e->Get(_i); } } }; + { auto _e = operators(); if (_e) { _o->operators.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operators[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = name(); if (_e) _o->name = _e->str(); }; } -inline flatbuffers::Offset SubGraph::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset SubGraph::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateSubGraph(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateSubGraph( - flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateSubGraph(flatbuffers::FlatBufferBuilder &_fbb, const SubGraphT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const SubGraphT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; - auto _tensors = - _o->tensors.size() - ? _fbb.CreateVector>( - _o->tensors.size(), - [](size_t i, _VectorArgs *__va) { - return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), - __va->__rehasher); - }, - &_va) - : 0; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SubGraphT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _tensors = _o->tensors.size() ? _fbb.CreateVector> (_o->tensors.size(), [](size_t i, _VectorArgs *__va) { return CreateTensor(*__va->__fbb, __va->__o->tensors[i].get(), __va->__rehasher); }, &_va ) : 0; auto _inputs = _o->inputs.size() ? _fbb.CreateVector(_o->inputs) : 0; auto _outputs = _o->outputs.size() ? _fbb.CreateVector(_o->outputs) : 0; - auto _operators = _o->operators.size() - ? _fbb.CreateVector>( - _o->operators.size(), - [](size_t i, _VectorArgs *__va) { - return CreateOperator( - *__va->__fbb, __va->__o->operators[i].get(), - __va->__rehasher); - }, - &_va) - : 0; + auto _operators = _o->operators.size() ? _fbb.CreateVector> (_o->operators.size(), [](size_t i, _VectorArgs *__va) { return CreateOperator(*__va->__fbb, __va->__o->operators[i].get(), __va->__rehasher); }, &_va ) : 0; auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name); - return tflite::CreateSubGraph(_fbb, _tensors, _inputs, _outputs, _operators, - _name); + return tflite::CreateSubGraph( + _fbb, + _tensors, + _inputs, + _outputs, + _operators, + _name); } -inline BufferT *Buffer::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline BufferT *Buffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new BufferT(); UnPackTo(_o, _resolver); return _o; } -inline void Buffer::UnPackTo( - BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void Buffer::UnPackTo(BufferT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = data(); - if (_e) { - _o->data.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->data[_i] = _e->Get(_i); - } - } - }; + { auto _e = data(); if (_e) { _o->data.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->data[_i] = _e->Get(_i); } } }; } -inline flatbuffers::Offset Buffer::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Buffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateBuffer(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateBuffer( - flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateBuffer(flatbuffers::FlatBufferBuilder &_fbb, const BufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const BufferT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _data = _o->data.size() ? _fbb.CreateVector(_o->data) : 0; - return tflite::CreateBuffer(_fbb, _data); + return tflite::CreateBuffer( + _fbb, + _data); } -inline ModelT *Model::UnPack( - const flatbuffers::resolver_function_t *_resolver) const { +inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new ModelT(); UnPackTo(_o, _resolver); return _o; } -inline void Model::UnPackTo( - ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const { +inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const { (void)_o; (void)_resolver; - { - auto _e = version(); - _o->version = _e; - }; - { - auto _e = operator_codes(); - if (_e) { - _o->operator_codes.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->operator_codes[_i] = - std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); - } - } - }; - { - auto _e = subgraphs(); - if (_e) { - _o->subgraphs.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->subgraphs[_i] = - std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); - } - } - }; - { - auto _e = description(); - if (_e) _o->description = _e->str(); - }; - { - auto _e = buffers(); - if (_e) { - _o->buffers.resize(_e->size()); - for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { - _o->buffers[_i] = - std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); - } - } - }; + { auto _e = version(); _o->version = _e; }; + { auto _e = operator_codes(); if (_e) { _o->operator_codes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->operator_codes[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = subgraphs(); if (_e) { _o->subgraphs.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->subgraphs[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; + { auto _e = description(); if (_e) _o->description = _e->str(); }; + { auto _e = buffers(); if (_e) { _o->buffers.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->buffers[_i] = std::unique_ptr(_e->Get(_i)->UnPack(_resolver)); } } }; } -inline flatbuffers::Offset Model::Pack( - flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) { return CreateModel(_fbb, _o, _rehasher); } -inline flatbuffers::Offset CreateModel( - flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, - const flatbuffers::rehasher_function_t *_rehasher) { +inline flatbuffers::Offset CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) { (void)_rehasher; (void)_o; - struct _VectorArgs { - flatbuffers::FlatBufferBuilder *__fbb; - const ModelT *__o; - const flatbuffers::rehasher_function_t *__rehasher; - } _va = {&_fbb, _o, _rehasher}; - (void)_va; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; auto _version = _o->version; - auto _operator_codes = - _o->operator_codes.size() - ? _fbb.CreateVector>( - _o->operator_codes.size(), - [](size_t i, _VectorArgs *__va) { - return CreateOperatorCode(*__va->__fbb, - __va->__o->operator_codes[i].get(), - __va->__rehasher); - }, - &_va) - : 0; - auto _subgraphs = _o->subgraphs.size() - ? _fbb.CreateVector>( - _o->subgraphs.size(), - [](size_t i, _VectorArgs *__va) { - return CreateSubGraph( - *__va->__fbb, __va->__o->subgraphs[i].get(), - __va->__rehasher); - }, - &_va) - : 0; - auto _description = - _o->description.empty() ? 0 : _fbb.CreateString(_o->description); - auto _buffers = - _o->buffers.size() - ? _fbb.CreateVector>( - _o->buffers.size(), - [](size_t i, _VectorArgs *__va) { - return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), - __va->__rehasher); - }, - &_va) - : 0; - return tflite::CreateModel(_fbb, _version, _operator_codes, _subgraphs, - _description, _buffers); -} - -inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, - const void *obj, BuiltinOptions type) { + auto _operator_codes = _o->operator_codes.size() ? _fbb.CreateVector> (_o->operator_codes.size(), [](size_t i, _VectorArgs *__va) { return CreateOperatorCode(*__va->__fbb, __va->__o->operator_codes[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _subgraphs = _o->subgraphs.size() ? _fbb.CreateVector> (_o->subgraphs.size(), [](size_t i, _VectorArgs *__va) { return CreateSubGraph(*__va->__fbb, __va->__o->subgraphs[i].get(), __va->__rehasher); }, &_va ) : 0; + auto _description = _o->description.empty() ? 0 : _fbb.CreateString(_o->description); + auto _buffers = _o->buffers.size() ? _fbb.CreateVector> (_o->buffers.size(), [](size_t i, _VectorArgs *__va) { return CreateBuffer(*__va->__fbb, __va->__o->buffers[i].get(), __va->__rehasher); }, &_va ) : 0; + return tflite::CreateModel( + _fbb, + _version, + _operator_codes, + _subgraphs, + _description, + _buffers); +} + +inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type) { switch (type) { case BuiltinOptions_NONE: { return true; @@ -6636,8 +5536,7 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, return verifier.VerifyTable(ptr); } case BuiltinOptions_LocalResponseNormalizationOptions: { - auto ptr = - reinterpret_cast(obj); + auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } case BuiltinOptions_LSTMOptions: { @@ -6720,29 +5619,27 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } - default: - return false; + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return false; } } -inline bool VerifyBuiltinOptionsVector( - flatbuffers::Verifier &verifier, - const flatbuffers::Vector> *values, - const flatbuffers::Vector *types) { +inline bool VerifyBuiltinOptionsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector> *values, const flatbuffers::Vector *types) { if (!values || !types) return !values && !types; if (values->size() != types->size()) return false; for (flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { - if (!VerifyBuiltinOptions(verifier, values->Get(i), - types->GetEnum(i))) { + if (!VerifyBuiltinOptions( + verifier, values->Get(i), types->GetEnum(i))) { return false; } } return true; } -inline void *BuiltinOptionsUnion::UnPack( - const void *obj, BuiltinOptions type, - const flatbuffers::resolver_function_t *resolver) { +inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, const flatbuffers::resolver_function_t *resolver) { switch (type) { case BuiltinOptions_Conv2DOptions: { auto ptr = reinterpret_cast(obj); @@ -6793,8 +5690,7 @@ inline void *BuiltinOptionsUnion::UnPack( return ptr->UnPack(resolver); } case BuiltinOptions_LocalResponseNormalizationOptions: { - auto ptr = - reinterpret_cast(obj); + auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } case BuiltinOptions_LSTMOptions: { @@ -6877,14 +5773,15 @@ inline void *BuiltinOptionsUnion::UnPack( auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } - default: - return nullptr; + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + default: return nullptr; } } -inline flatbuffers::Offset BuiltinOptionsUnion::Pack( - flatbuffers::FlatBufferBuilder &_fbb, - const flatbuffers::rehasher_function_t *_rehasher) const { +inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBufferBuilder &_fbb, const flatbuffers::rehasher_function_t *_rehasher) const { switch (type) { case BuiltinOptions_Conv2DOptions: { auto ptr = reinterpret_cast(value); @@ -6935,10 +5832,8 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack( return CreateL2NormOptions(_fbb, ptr, _rehasher).Union(); } case BuiltinOptions_LocalResponseNormalizationOptions: { - auto ptr = - reinterpret_cast(value); - return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher) - .Union(); + auto ptr = reinterpret_cast(value); + return CreateLocalResponseNormalizationOptions(_fbb, ptr, _rehasher).Union(); } case BuiltinOptions_LSTMOptions: { auto ptr = reinterpret_cast(value); @@ -7020,32 +5915,30 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack( auto ptr = reinterpret_cast(value); return CreateExpOptions(_fbb, ptr, _rehasher).Union(); } - default: - return 0; + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(value); + return CreateTopKV2Options(_fbb, ptr, _rehasher).Union(); + } + default: return 0; } } -inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) - FLATBUFFERS_NOEXCEPT : type(u.type), - value(nullptr) { +inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FLATBUFFERS_NOEXCEPT : type(u.type), value(nullptr) { switch (type) { case BuiltinOptions_Conv2DOptions: { value = new Conv2DOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_DepthwiseConv2DOptions: { - value = new DepthwiseConv2DOptionsT( - *reinterpret_cast(u.value)); + value = new DepthwiseConv2DOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_ConcatEmbeddingsOptions: { - value = new ConcatEmbeddingsOptionsT( - *reinterpret_cast(u.value)); + value = new ConcatEmbeddingsOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_LSHProjectionOptions: { - value = new LSHProjectionOptionsT( - *reinterpret_cast(u.value)); + value = new LSHProjectionOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_Pool2DOptions: { @@ -7061,18 +5954,15 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_FullyConnectedOptions: { - value = new FullyConnectedOptionsT( - *reinterpret_cast(u.value)); + value = new FullyConnectedOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_SoftmaxOptions: { - value = - new SoftmaxOptionsT(*reinterpret_cast(u.value)); + value = new SoftmaxOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_ConcatenationOptions: { - value = new ConcatenationOptionsT( - *reinterpret_cast(u.value)); + value = new ConcatenationOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_AddOptions: { @@ -7084,8 +5974,7 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_LocalResponseNormalizationOptions: { - value = new LocalResponseNormalizationOptionsT( - *reinterpret_cast(u.value)); + value = new LocalResponseNormalizationOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_LSTMOptions: { @@ -7093,8 +5982,7 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_ResizeBilinearOptions: { - value = new ResizeBilinearOptionsT( - *reinterpret_cast(u.value)); + value = new ResizeBilinearOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_CallOptions: { @@ -7102,23 +5990,19 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_ReshapeOptions: { - value = - new ReshapeOptionsT(*reinterpret_cast(u.value)); + value = new ReshapeOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_SkipGramOptions: { - value = - new SkipGramOptionsT(*reinterpret_cast(u.value)); + value = new SkipGramOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_SpaceToDepthOptions: { - value = new SpaceToDepthOptionsT( - *reinterpret_cast(u.value)); + value = new SpaceToDepthOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_EmbeddingLookupSparseOptions: { - value = new EmbeddingLookupSparseOptionsT( - *reinterpret_cast(u.value)); + value = new EmbeddingLookupSparseOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_MulOptions: { @@ -7134,18 +6018,15 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_BatchToSpaceNDOptions: { - value = new BatchToSpaceNDOptionsT( - *reinterpret_cast(u.value)); + value = new BatchToSpaceNDOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_SpaceToBatchNDOptions: { - value = new SpaceToBatchNDOptionsT( - *reinterpret_cast(u.value)); + value = new SpaceToBatchNDOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_TransposeOptions: { - value = new TransposeOptionsT( - *reinterpret_cast(u.value)); + value = new TransposeOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_MeanOptions: { @@ -7161,24 +6042,25 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) break; } case BuiltinOptions_SqueezeOptions: { - value = - new SqueezeOptionsT(*reinterpret_cast(u.value)); + value = new SqueezeOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_SequenceRNNOptions: { - value = new SequenceRNNOptionsT( - *reinterpret_cast(u.value)); + value = new SequenceRNNOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_StridedSliceOptions: { - value = new StridedSliceOptionsT( - *reinterpret_cast(u.value)); + value = new StridedSliceOptionsT(*reinterpret_cast(u.value)); break; } case BuiltinOptions_ExpOptions: { value = new ExpOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_TopKV2Options: { + value = new TopKV2OptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -7351,8 +6233,12 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } - default: + case BuiltinOptions_TopKV2Options: { + auto ptr = reinterpret_cast(value); + delete ptr; break; + } + default: break; } value = nullptr; type = BuiltinOptions_NONE; @@ -7362,25 +6248,33 @@ inline const tflite::Model *GetModel(const void *buf) { return flatbuffers::GetRoot(buf); } -inline const char *ModelIdentifier() { return "TFL3"; } +inline const char *ModelIdentifier() { + return "TFL3"; +} inline bool ModelBufferHasIdentifier(const void *buf) { - return flatbuffers::BufferHasIdentifier(buf, ModelIdentifier()); + return flatbuffers::BufferHasIdentifier( + buf, ModelIdentifier()); } -inline bool VerifyModelBuffer(flatbuffers::Verifier &verifier) { +inline bool VerifyModelBuffer( + flatbuffers::Verifier &verifier) { return verifier.VerifyBuffer(ModelIdentifier()); } -inline const char *ModelExtension() { return "tflite"; } +inline const char *ModelExtension() { + return "tflite"; +} -inline void FinishModelBuffer(flatbuffers::FlatBufferBuilder &fbb, - flatbuffers::Offset root) { +inline void FinishModelBuffer( + flatbuffers::FlatBufferBuilder &fbb, + flatbuffers::Offset root) { fbb.Finish(root, ModelIdentifier()); } inline std::unique_ptr UnPackModel( - const void *buf, const flatbuffers::resolver_function_t *res = nullptr) { + const void *buf, + const flatbuffers::resolver_function_t *res = nullptr) { return std::unique_ptr(GetModel(buf)->UnPack(res)); } diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 87945353cf..d9e269f593 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -49,6 +49,7 @@ gen_zipped_test_files( "squeeze.zip", "strided_slice.zip", "sub.zip", + "topk.zip", "transpose.zip", ], ) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 1cf3430007..f0b4fcbd52 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -329,6 +329,12 @@ def toco_convert(graph_def_str, input_tensors, output_tensors, return (None if exit_code != 0 else output_file.read()), log +def normalize_output_name(output_name): + """Remove :0 suffix from tensor names.""" + return output_name.split(":")[0] if output_name.endswith( + ":0") else output_name + + def make_zip_of_tests(zip_path, test_parameters, make_graph, @@ -417,8 +423,8 @@ def make_zip_of_tests(zip_path, sess.graph_def.SerializeToString(), [(input_tensor.name.split(":")[0], input_tensor.get_shape(), input_tensor.dtype) for input_tensor in inputs], - [out.name.split(":")[0] - for out in outputs], drop_control_dependency) + [normalize_output_name(out.name) for out in outputs], + drop_control_dependency) report["toco"] = (report_lib.SUCCESS if tflite_model_binary is not None else report_lib.FAILED) report["toco_log"] = toco_log @@ -1705,6 +1711,32 @@ def make_l2_pool(input_tensor, ksize, strides, padding, data_format): padding=padding, data_format=data_format)) +def make_topk_tests(zip_path): + """Make a set of tests to do gather.""" + + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32], + "input_shape": [[10], [5, 20]], + }] + + def build_graph(parameters): + """Build the gather op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input", + shape=parameters["input_shape"]) + k = tf.constant(3, name="k") + out = tf.nn.top_k(input_value, k) + return [input_value], [out[1]] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + # Toco binary path provided by the generate rule. bin_path = None @@ -1753,6 +1785,7 @@ def main(unused_args): "sigmoid.zip": make_sigmoid_tests, "softmax.zip": make_softmax_tests, "space_to_depth.zip": make_space_to_depth_tests, + "topk.zip": make_topk_tests, "transpose.zip": make_transpose_tests, "mean.zip": make_mean_tests, "squeeze.zip": make_squeeze_tests, diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 70d7a9d4a5..7dc36a6d13 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1579,6 +1579,17 @@ void ConvertTensorFlowMaximumOperator(const Model& model, (*sub_op->mutable_attr())["T"].set_type(data_type); } +void ConvertTopKV2Operator(const Model& model, const TopKV2Operator& src_op, + GraphDef* tensorflow_graph) { + auto* topk_op = tensorflow_graph->add_node(); + topk_op->set_op("TOPKV2"); + topk_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 2); + *topk_op->add_input() = src_op.inputs[0]; + *topk_op->add_input() = src_op.inputs[1]; + (*topk_op->mutable_attr())["sorted"].set_b(true); +} + void ConvertOperator(const Model& model, const Operator& src_op, GraphDef* tensorflow_graph) { if (src_op.fused_activation_function != FusedActivationFunctionType::kNone) { @@ -1727,6 +1738,9 @@ void ConvertOperator(const Model& model, const Operator& src_op, } else if (src_op.type == OperatorType::kArgMax) { ConvertArgMaxOperator(model, static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kTopK_V2) { + ConvertTopKV2Operator(model, static_cast(src_op), + tensorflow_graph); } else if (src_op.type == OperatorType::kTranspose) { ConvertTransposeOperator( model, static_cast(src_op), tensorflow_graph); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index ddcc03813f..0cf0994b43 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -982,6 +982,43 @@ void ProcessGatherOperator(Model* model, GatherOperator* op) { } } +void ProcessTopkV2Operator(Model* model, TopKV2Operator* op) { + const auto& input_values = model->GetArray(op->inputs[0]); + const auto& input_k = model->GetArray(op->inputs[1]); + auto& output_indexes = model->GetArray(op->outputs[0]); + auto& output_values = model->GetArray(op->outputs[1]); + + // Bail if we already know the output shape. + if (output_indexes.has_shape()) { + QCHECK(output_values.has_shape()); + return; + } + + // Yield until input dims have been resolved. + if (!input_values.has_shape()) { + return; + } + + const auto& input_values_shape = input_values.shape(); + auto output_indexes_dims = output_indexes.mutable_shape()->mutable_dims(); + auto output_values_dims = output_values.mutable_shape()->mutable_dims(); + for (int dim = 0; dim < input_values_shape.dimensions_count() - 1; dim++) { + output_indexes_dims->push_back(input_values_shape.dims(dim)); + output_values_dims->push_back(input_values_shape.dims(dim)); + } + // If the value is initialized, we can specify the last dimension, otherwise + // unknown. + if (input_k.buffer) { + const int32_t k_value = input_k.GetBuffer().data[0]; + output_indexes_dims->push_back(k_value); + output_values_dims->push_back(k_value); + + } else { + output_indexes_dims->push_back(0); + output_values_dims->push_back(0); + } +} + void ProcessPadOperator(Model* model, PadOperator* op) { CHECK_EQ(op->inputs.size(), 2); CHECK_EQ(op->outputs.size(), 1); @@ -1333,7 +1370,9 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kGather: ProcessGatherOperator(model, static_cast(op)); break; - + case OperatorType::kTopK_V2: + ProcessTopkV2Operator(model, static_cast(op)); + break; case OperatorType::kAdd: case OperatorType::kSub: case OperatorType::kMul: diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 02c3b2ed9f..330506200c 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -21,6 +21,7 @@ limitations under the License. #include "google/protobuf/map.h" #include "google/protobuf/text_format.h" +#include "absl/memory/memory.h" #include "absl/strings/match.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" @@ -1816,6 +1817,37 @@ bool InlineAllFunctions(GraphDef* graphdef) { } return graph_modified; } + +void ConvertTopKV2Operator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK((node.op() == "TopK") || (node.op() == "TopKV2")); + auto op = absl::make_unique(); + op->inputs.push_back(node.input(0)); + // K can be encoded as attr (TopK) convert it to a const. + if (HasAttr(node, "k")) { + // Convert attribute into const tensor. + const string array_name = node.name() + "k"; + auto& array = model->GetOrCreateArray(array_name); + array.data_type = ArrayDataType::kInt32; + // Size of array is always 1. + array.mutable_shape()->mutable_dims()->emplace_back(1); + + auto& output_int_data = + array.GetMutableBuffer().data; + output_int_data.resize(1); + output_int_data[0] = GetIntAttr(node, "k"); + op->inputs.push_back(array_name); + + } else { + CheckInputsCount(node, tf_import_flags, 2); + op->inputs.push_back(node.input(1)); + } + // The op has two outputs. + op->outputs.push_back(node.name() + ":0"); + op->outputs.push_back(node.name() + ":1"); + model->operators.emplace_back(op.release()); +} } // namespace std::unique_ptr ImportTensorFlowGraphDef( @@ -1999,6 +2031,8 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertArgMaxOperator(node, tf_import_flags, model); } else if (node.op() == "Exp") { ConvertExpOperator(node, tf_import_flags, model); + } else if (node.op() == "TopK" || node.op() == "TopKV2") { + ConvertTopKV2Operator(node, tf_import_flags, model); } else { ConvertUnsupportedOperator(node, tf_import_flags, model); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 4c44f3fd66..2bcd6da3da 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -114,6 +114,7 @@ enum class OperatorType { kTensorFlowSwitch, kTensorFlowTile, kTranspose, + kTopK_V2, // An unsupported TF operation. It's only needed to be able to represent TF // graph internally and is expected to be dropped by graph transformations. kTensorFlowUnsupported, @@ -1400,6 +1401,14 @@ struct SvdfOperator : Operator { int rank; }; +// TopKV2 operator. +// +// Inputs: +// input tensor and top_k scalar. +struct TopKV2Operator : Operator { + TopKV2Operator() : Operator(OperatorType::kTopK_V2) {} +}; + // Alloc's are used for transient arrays only. An Alloc specifies which interval // of the "transient_data" workspace buffer passed to inference functions, is to // be used for the transient array at hand. The 'start' and 'end' values are diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 2583ec0e34..5f2caa5bbb 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -637,6 +637,20 @@ class StridedSlice } }; +class TopK_V2 : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateTopKV2Options(*builder); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override {} +}; + class TensorFlowUnsupported : public BaseOperator { public: using BaseOperator::BaseOperator; @@ -801,6 +815,8 @@ std::vector> BuildOperatorList() { new Squeeze(::tflite::BuiltinOperator_SQUEEZE, OperatorType::kSqueeze)); ops.emplace_back(new StridedSlice(::tflite::BuiltinOperator_STRIDED_SLICE, OperatorType::kStridedSlice)); + ops.emplace_back( + new TopK_V2(::tflite::BuiltinOperator_TOPK_V2, OperatorType::kTopK_V2)); ops.emplace_back( new Lstm(::tflite::BuiltinOperator_LSTM, OperatorType::kLstmCell)); diff --git a/tensorflow/contrib/lite/toco/tflite/operator_test.cc b/tensorflow/contrib/lite/toco/tflite/operator_test.cc index 05c325ef91..5c486f72ad 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator_test.cc @@ -380,6 +380,13 @@ TEST_F(OperatorTest, StridedSlice) { EXPECT_EQ(op.shrink_axis_mask, output_toco_op->shrink_axis_mask); } +TEST_F(OperatorTest, BuiltinTopKV2) { + TopKV2Operator op; + auto output_toco_op = SerializeAndDeserialize( + GetOperator("TOPK_V2", OperatorType::kTopK_V2), op); + ASSERT_NE(nullptr, output_toco_op.get()); +} + TEST_F(OperatorTest, TensorFlowUnsupported) { TensorFlowUnsupportedOperator op; op.tensorflow_op = "MyCustomUnsupportedOp"; diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 627541595b..249c03ca3c 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -312,6 +312,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(Mean) HANDLE_OPERATORTYPENAME_CASE(Svdf) HANDLE_OPERATORTYPENAME_CASE(ArgMax) + HANDLE_OPERATORTYPENAME_CASE(TopK_V2) HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported) HANDLE_OPERATORTYPENAME_CASE(Exp) default: -- GitLab From ab57b485a5c1b9246f032b4ba049a9d207206a16 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 11:44:04 -0800 Subject: [PATCH 230/629] Register "Snapshot" op inserted by Grappler arithmetic optimization. For (mostly) pure XLA graphs, this op is identical to "Identity". PiperOrigin-RevId: 185873337 --- tensorflow/compiler/tf2xla/kernels/identity_op.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/tf2xla/kernels/identity_op.cc b/tensorflow/compiler/tf2xla/kernels/identity_op.cc index d2b1f7913e..39af662b63 100644 --- a/tensorflow/compiler/tf2xla/kernels/identity_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/identity_op.cc @@ -40,6 +40,7 @@ REGISTER_XLA_OP(Name("Identity").CompilationOnly(), IdentityOp); REGISTER_XLA_OP(Name("IdentityN").CompilationOnly(), IdentityOp); REGISTER_XLA_OP(Name("PreventGradient"), IdentityOp); REGISTER_XLA_OP(Name("StopGradient"), IdentityOp); +REGISTER_XLA_OP(Name("Snapshot"), IdentityOp); } // namespace } // namespace tensorflow -- GitLab From 4ed183d9d471ca04cf3961610a027136298c1788 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 15 Feb 2018 12:08:36 -0800 Subject: [PATCH 231/629] [XLA] Fix priority queue in HLO scheduling. The priority of an HLO can change during the scheduling. Use immutable values in priority queue entries, and reinsert an entry if its priority goes up. PiperOrigin-RevId: 185878562 --- .../compiler/xla/service/hlo_scheduling.cc | 84 ++++++++++++++----- 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index 5f5a930dad..8dc4d4f7ba 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -101,7 +101,7 @@ class ListScheduler { // LogicalBuffer is in an operand of the instruction as indicated by // points-to analysis. for (auto* instruction : computation.instructions()) { - std::unordered_set instr_uses; + tensorflow::gtl::FlatSet instr_uses; for (auto* operand : instruction->operands()) { for (const LogicalBuffer* buffer : points_to_analysis.GetBuffersDefinedByInstruction(operand)) { @@ -151,10 +151,8 @@ class ListScheduler { int64 bytes_defined; // For each buffer B used by this instruction, we keep a pair (B, U), where - // U is the number of uses of B that have not yet been scheduled. This pair - // is a pointer into the unscheduled_use_count_ map, so it gets updated for - // free when we update counts in the map. - std::vector*> + // U is the number of uses of B that have not yet been scheduled. + std::vector> used_buffer_unscheduled_use_counts; }; @@ -177,8 +175,8 @@ class ListScheduler { } auto unscheduled_use_count_it = unscheduled_use_count_.find(buffer); CHECK(unscheduled_use_count_it != unscheduled_use_count_.end()); - entry.used_buffer_unscheduled_use_counts.push_back( - &*unscheduled_use_count_it); + entry.used_buffer_unscheduled_use_counts.emplace_back( + unscheduled_use_count_it->first, unscheduled_use_count_it->second); } return entry; } @@ -187,8 +185,8 @@ class ListScheduler { int64 BytesFreedIfScheduled(const ReadyListEntry& entry) { int64 freed_bytes = 0; for (const auto& kv : entry.used_buffer_unscheduled_use_counts) { - auto buffer = kv->first; - auto use_count = kv->second; + auto buffer = kv.first; + auto use_count = kv.second; if (use_count == 1) { freed_bytes += size_function_(*buffer); } @@ -206,7 +204,8 @@ class ListScheduler { // Populate the ready list with instructions which have no operands or // control predecessors. - std::unordered_map unscheduled_pred_count; + tensorflow::gtl::FlatMap + unscheduled_pred_count; for (auto* instruction : computation_.instructions()) { // TODO(b/34466113): Replace this and above with successors() or // predecessors() when these methods are added to HloInstruction. @@ -218,33 +217,57 @@ class ListScheduler { } } - auto priority_comparator = [this](const ReadyListEntry& lhs, - const ReadyListEntry& rhs) { - return GetPriority(lhs) < GetPriority(rhs); - }; - std::priority_queue, + auto priority_comparator = + [this](const std::pair& lhs, + const std::pair& rhs) { + return lhs.first < rhs.first; + }; + std::priority_queue, + std::vector>, decltype(priority_comparator)> ready_queue(priority_comparator); + + // Set of instructions in the ready list. + tensorflow::gtl::FlatSet ready_instructions; + + auto add_to_ready_queue = [&](HloInstruction* inst) { + auto entry = MakeReadyListEntry(inst); + ready_queue.emplace(GetPriority(entry), std::move(entry)); + ready_instructions.insert(inst); + }; + for (auto* instruction : computation_.instructions()) { // Instruction with no operands or control predecessors will // not be in the map. if (unscheduled_pred_count.count(instruction) == 0) { - ready_queue.emplace(MakeReadyListEntry(instruction)); + add_to_ready_queue(instruction); } } while (!ready_queue.empty()) { // Remove the selected instruction from the ready list and add it to the // schedule. - const HloInstruction* best = ready_queue.top().instruction; + const HloInstruction* best = ready_queue.top().second.instruction; ready_queue.pop(); + // We may have duplicates in the priority queue, because when a ready + // instruction's priority goes up, we reinsert it to the priority queue. + // Skip the duplicate. + if (scheduled_instructions_.find(best) != scheduled_instructions_.end()) { + continue; + } + ready_instructions.erase(best); schedule.push_back(best); scheduled_instructions_.insert(best); + bool adjust_ready_queue = false; // Update the unscheduled uses of the logical buffers. for (const LogicalBuffer* buffer : buffer_uses_.at(best)) { - CHECK_GT(unscheduled_use_count_.at(buffer), 0); - --unscheduled_use_count_[buffer]; + int64& count = unscheduled_use_count_[buffer]; + CHECK_GT(count, 0); + --count; + if (count == 1) { + adjust_ready_queue = true; + } } // Add new instructions to ready list. @@ -252,7 +275,7 @@ class ListScheduler { int64 pred_count = --unscheduled_pred_count.at(inst); CHECK_GE(pred_count, 0); if (pred_count == 0) { - ready_queue.emplace(MakeReadyListEntry(inst)); + add_to_ready_queue(inst); } }; // TODO(b/34466113): Replace this and above with successors() or @@ -263,6 +286,20 @@ class ListScheduler { for (HloInstruction* succ : best->control_successors()) { update_pred_count(succ); } + // The unscheduled use count for a buffer has changed to 1, so the + // priorities of some ready instructions may go up. We reinsert them to + // the priority queue, so that they can appear earlier. The old entries + // will become duplicates and will be skipped. + if (adjust_ready_queue) { + for (HloInstruction* operand : best->operands()) { + for (HloInstruction* operand_user : operand->users()) { + if (ready_instructions.find(operand_user) != + ready_instructions.end()) { + add_to_ready_queue(operand_user); + } + } + } + } } CHECK_EQ(schedule.size(), computation_.instruction_count()); CHECK_EQ(scheduled_instructions_.size(), computation_.instruction_count()); @@ -275,15 +312,16 @@ class ListScheduler { const LogicalBuffer::SizeFunction& size_function_; // A map containing the LogicalBuffers that each instruction uses. - std::unordered_map> + tensorflow::gtl::FlatMap> buffer_uses_; // A map containing the count of unscheduled HLOs which using a particular // LogicalBuffer. We rely on iterator stability in this map. - std::unordered_map unscheduled_use_count_; + tensorflow::gtl::FlatMap unscheduled_use_count_; // Set of instructions which have been scheduled. - std::unordered_set scheduled_instructions_; + tensorflow::gtl::FlatSet scheduled_instructions_; }; int64 SumLogicalBufferSizes( -- GitLab From 4b297b5434438175b016da05421e7ddd46c0f8ee Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Feb 2018 12:39:52 -0800 Subject: [PATCH 232/629] Register kernels for Assign and AssignVariableOp on GPU for integer types. PiperOrigin-RevId: 185882834 --- .../kernels/dense_update_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/dense_update_ops.cc | 2 ++ .../core/kernels/resource_variable_ops.cc | 3 +++ tensorflow/core/kernels/variable_ops.cc | 1 + .../python/kernel_tests/array_ops_test.py | 9 ++++--- .../resource_variable_ops_test.py | 24 ++++++++++++------- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index c9c97dc072..9a3b2303a3 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,6 +57,7 @@ struct DenseUpdate { template struct functor::DenseUpdate; \ template struct functor::DenseUpdate; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS #define DEFINE_GPU_KERNELS(T) \ diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc index 6497c8f371..0de97de205 100644 --- a/tensorflow/core/kernels/dense_update_ops.cc +++ b/tensorflow/core/kernels/dense_update_ops.cc @@ -109,6 +109,7 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); AssignOpT); TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -142,6 +143,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); Name("AssignSub").Device(DEVICE_GPU).TypeConstraint("T"), \ DenseUpdateOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // end GOOGLE_CUDA diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 5b4aad3cdd..702fb89aac 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -130,6 +130,7 @@ REGISTER_KERNEL_BUILDER( ResourceHandleOp) TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_variant(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -398,6 +399,7 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); AssignVariableOp); TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_variant(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -456,6 +458,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); AssignUpdateVariableOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index 10ccc85b7c..7fd5809ca4 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -237,6 +237,7 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); IsVariableInitializedOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 1e2ea82988..365cf72108 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -498,7 +498,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): def test_basic_slice(self): for tensor_type in STRIDED_SLICE_TYPES: - with self.test_session(use_gpu=True): + with self.test_session(use_gpu=not tensor_type.is_integer): checker = StridedSliceChecker( self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type) _ = checker[:, :, :] @@ -884,7 +884,8 @@ class StridedSliceAssignChecker(object): if self.tensor_type.is_complex: value -= 1j * value - with self.test.test_session(use_gpu=True) as sess: + with self.test.test_session( + use_gpu=not self.tensor_type.is_integer) as sess: if self._use_resource: var = resource_variable_ops.ResourceVariable(self.x) else: @@ -974,9 +975,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase): errors.InvalidArgumentError, "l-value dtype int32 does not match r-value dtype int64"): sess.run(v[:].assign(too_large_val)) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - "l-value dtype int32 does not match r-value dtype int8"): + with self.assertRaises(errors.InvalidArgumentError): sess.run(v[:].assign(too_small_val)) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index dc6e73bd5b..8503f3e031 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -64,6 +64,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): 0, dtype=dtypes.int32)).run() + def testGPUInt64(self): + if not context.context().num_gpus(): + return + with context.eager_mode(), context.device("gpu:0"): + v = resource_variable_ops.ResourceVariable(1, dtype=dtypes.int64) + self.assertAllEqual(1, v.numpy()) + def testEagerNameNotIdentity(self): with context.eager_mode(): v0 = resource_variable_ops.ResourceVariable(1.0, name="a") @@ -162,14 +169,15 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes(use_gpu=True) def testScatterAdd(self): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate(resource_variable_ops.assign_variable_op( - handle, constant_op.constant([[1]], dtype=dtypes.int32))) - self.evaluate(resource_variable_ops.resource_scatter_add( - handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[3]]) + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate(resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate(resource_variable_ops.resource_scatter_add( + handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) def testScatterUpdateString(self): handle = resource_variable_ops.var_handle_op( -- GitLab From 972fa89023f8f27948321c388fa3f1f7857833c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 12:50:03 -0800 Subject: [PATCH 233/629] Add auc_with_confidence_intervals This method computes the AUC and corresponding confidence intervals using an efficient algorithm. PiperOrigin-RevId: 185884228 --- tensorflow/contrib/metrics/BUILD | 1 + tensorflow/contrib/metrics/__init__.py | 2 + .../contrib/metrics/python/ops/metric_ops.py | 291 ++++++++++++++++++ .../metrics/python/ops/metric_ops_test.py | 199 ++++++++++++ 4 files changed, 493 insertions(+) diff --git a/tensorflow/contrib/metrics/BUILD b/tensorflow/contrib/metrics/BUILD index 9de664c822..e90c525113 100644 --- a/tensorflow/contrib/metrics/BUILD +++ b/tensorflow/contrib/metrics/BUILD @@ -43,6 +43,7 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:weights_broadcast_ops", + "//tensorflow/python/ops/distributions", ], ) diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index d3dce46bfb..de02dc8f45 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -16,6 +16,7 @@ See the @{$python/contrib.metrics} guide. +@@auc_with_confidence_intervals @@streaming_accuracy @@streaming_mean @@streaming_recall @@ -83,6 +84,7 @@ from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics +from tensorflow.contrib.metrics.python.ops.metric_ops import auc_with_confidence_intervals from tensorflow.contrib.metrics.python.ops.metric_ops import cohen_kappa from tensorflow.contrib.metrics.python.ops.metric_ops import count from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 55946c128b..fc12bfd2b7 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -38,6 +38,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import weights_broadcast_ops +from tensorflow.python.ops.distributions.normal import Normal from tensorflow.python.util.deprecation import deprecated # Epsilon constant used to represent extremely small quantity. @@ -1196,6 +1197,295 @@ def streaming_dynamic_auc(labels, return auc, update_op +def _compute_placement_auc(labels, predictions, weights, alpha, + logit_transformation, is_valid): + """Computes the AUC and asymptotic normally distributed confidence interval. + + The calculations are achieved using the fact that AUC = P(Y_1>Y_0) and the + concept of placement values for each labeled group, as presented by Delong and + Delong (1988). The actual algorithm used is a more computationally efficient + approach presented by Sun and Xu (2014). This could be slow for large batches, + but has the advantage of not having its results degrade depending on the + distribution of predictions. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as + `predictions` with values of 0 or 1 and type `int64`. + predictions: A 1-D `Tensor` of predictions whose values are `float64`. + weights: `Tensor` whose rank is either 0, or the same rank as `labels`. + alpha: Confidence interval level desired. + logit_transformation: A boolean value indicating whether the estimate should + be logit transformed prior to calculating the confidence interval. Doing + so enforces the restriction that the AUC should never be outside the + interval [0,1]. + is_valid: A bool tensor describing whether the input is valid. + + Returns: + A 1-D `Tensor` containing the area-under-curve, lower, and upper confidence + interval values. + """ + # Disable the invalid-name checker so that we can capitalize the name. + # pylint: disable=invalid-name + AucData = collections_lib.namedtuple('AucData', ['auc', 'lower', 'upper']) + # pylint: enable=invalid-name + + # If all the labels are the same or if number of observations are too few, + # AUC isn't well-defined + size = array_ops.size(predictions, out_type=dtypes.int32) + + # Count the total number of positive and negative labels in the input. + total_0 = math_ops.reduce_sum( + math_ops.cast(1 - labels, weights.dtype) * weights) + total_1 = math_ops.reduce_sum( + math_ops.cast(labels, weights.dtype) * weights) + + # Sort the predictions ascending, as well as + # (i) the corresponding labels and + # (ii) the corresponding weights. + ordered_predictions, indices = nn.top_k(predictions, k=size, sorted=True) + ordered_predictions = array_ops.reverse( + ordered_predictions, axis=array_ops.zeros(1, dtypes.int32)) + indices = array_ops.reverse(indices, axis=array_ops.zeros(1, dtypes.int32)) + ordered_labels = array_ops.gather(labels, indices) + ordered_weights = array_ops.gather(weights, indices) + + # We now compute values required for computing placement values. + + # We generate a list of indices (segmented_indices) of increasing order. An + # index is assigned for each unique prediction float value. Prediction + # values that are the same share the same index. + _, segmented_indices = array_ops.unique(ordered_predictions) + + # We create 2 tensors of weights. weights_for_true is non-zero for true + # labels. weights_for_false is non-zero for false labels. + float_labels_for_true = math_ops.cast(ordered_labels, dtypes.float32) + float_labels_for_false = 1.0 - float_labels_for_true + weights_for_true = ordered_weights * float_labels_for_true + weights_for_false = ordered_weights * float_labels_for_false + + # For each set of weights with the same segmented indices, we add up the + # weight values. Note that for each label, we deliberately rely on weights + # for the opposite label. + weight_totals_for_true = math_ops.segment_sum(weights_for_false, + segmented_indices) + weight_totals_for_false = math_ops.segment_sum(weights_for_true, + segmented_indices) + + # These cumulative sums of weights importantly exclude the current weight + # sums. + cum_weight_totals_for_true = math_ops.cumsum(weight_totals_for_true, + exclusive=True) + cum_weight_totals_for_false = math_ops.cumsum(weight_totals_for_false, + exclusive=True) + + # Compute placement values using the formula. Values with the same segmented + # indices and labels share the same placement values. + placements_for_true = ( + (cum_weight_totals_for_true + weight_totals_for_true / 2.0) / + (math_ops.reduce_sum(weight_totals_for_true) + _EPSILON)) + placements_for_false = ( + (cum_weight_totals_for_false + weight_totals_for_false / 2.0) / + (math_ops.reduce_sum(weight_totals_for_false) + _EPSILON)) + + # We expand the tensors of placement values (for each label) so that their + # shapes match that of predictions. + placements_for_true = array_ops.gather(placements_for_true, segmented_indices) + placements_for_false = array_ops.gather(placements_for_false, + segmented_indices) + + # Select placement values based on the label for each index. + placement_values = ( + placements_for_true * float_labels_for_true + + placements_for_false * float_labels_for_false) + + # Split placement values by labeled groups. + placement_values_0 = placement_values * math_ops.cast( + 1 - ordered_labels, weights.dtype) + weights_0 = ordered_weights * math_ops.cast( + 1 - ordered_labels, weights.dtype) + placement_values_1 = placement_values * math_ops.cast( + ordered_labels, weights.dtype) + weights_1 = ordered_weights * math_ops.cast( + ordered_labels, weights.dtype) + + # Calculate AUC using placement values + auc_0 = (math_ops.reduce_sum(weights_0 * (1. - placement_values_0)) / + (total_0 + _EPSILON)) + auc_1 = (math_ops.reduce_sum(weights_1 * (placement_values_1)) / + (total_1 + _EPSILON)) + auc = array_ops.where(math_ops.less(total_0, total_1), auc_1, auc_0) + + # Calculate variance and standard error using the placement values. + var_0 = ( + math_ops.reduce_sum( + weights_0 * math_ops.square(1. - placement_values_0 - auc_0)) / + (total_0 - 1. + _EPSILON)) + var_1 = ( + math_ops.reduce_sum( + weights_1 * math_ops.square(placement_values_1 - auc_1)) / + (total_1 - 1. + _EPSILON)) + auc_std_err = math_ops.sqrt( + (var_0 / (total_0 + _EPSILON)) + (var_1 / (total_1 + _EPSILON))) + + # Calculate asymptotic normal confidence intervals + std_norm_dist = Normal(loc=0., scale=1.) + z_value = std_norm_dist.quantile((1.0 - alpha) / 2.0) + if logit_transformation: + estimate = math_ops.log(auc / (1. - auc + _EPSILON)) + std_err = auc_std_err / (auc * (1. - auc + _EPSILON)) + transformed_auc_lower = estimate + (z_value * std_err) + transformed_auc_upper = estimate - (z_value * std_err) + def inverse_logit_transformation(x): + exp_negative = math_ops.exp(math_ops.negative(x)) + return 1. / (1. + exp_negative + _EPSILON) + + auc_lower = inverse_logit_transformation(transformed_auc_lower) + auc_upper = inverse_logit_transformation(transformed_auc_upper) + else: + estimate = auc + std_err = auc_std_err + auc_lower = estimate + (z_value * std_err) + auc_upper = estimate - (z_value * std_err) + + ## If estimate is 1 or 0, no variance is present so CI = 1 + ## n.b. This can be misleading, since number obs can just be too low. + lower = array_ops.where( + math_ops.logical_or( + math_ops.equal(auc, array_ops.ones_like(auc)), + math_ops.equal(auc, array_ops.zeros_like(auc))), + auc, auc_lower) + upper = array_ops.where( + math_ops.logical_or( + math_ops.equal(auc, array_ops.ones_like(auc)), + math_ops.equal(auc, array_ops.zeros_like(auc))), + auc, auc_upper) + + # If all the labels are the same, AUC isn't well-defined (but raising an + # exception seems excessive) so we return 0, otherwise we finish computing. + trivial_value = array_ops.constant(0.0) + + return AucData(*control_flow_ops.cond( + is_valid, lambda: [auc, lower, upper], lambda: [trivial_value]*3)) + + +def auc_with_confidence_intervals(labels, + predictions, + weights=None, + alpha=0.95, + logit_transformation=True, + metrics_collections=(), + updates_collections=(), + name=None): + """Computes the AUC and asymptotic normally distributed confidence interval. + + USAGE NOTE: this approach requires storing all of the predictions and labels + for a single evaluation in memory, so it may not be usable when the evaluation + batch size and/or the number of evaluation steps is very large. + + Computes the area under the ROC curve and its confidence interval using + placement values. This has the advantage of being resilient to the + distribution of predictions by aggregating across batches, accumulating labels + and predictions and performing the final calculation using all of the + concatenated values. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as `labels` + and with values of 0 or 1 whose values are castable to `int64`. + predictions: A `Tensor` of predictions whose values are castable to + `float64`. Will be flattened into a 1-D `Tensor`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`. + alpha: Confidence interval level desired. + logit_transformation: A boolean value indicating whether the estimate should + be logit transformed prior to calculating the confidence interval. Doing + so enforces the restriction that the AUC should never be outside the + interval [0,1]. + metrics_collections: An optional iterable of collections that `auc` should + be added to. + updates_collections: An optional iterable of collections that `update_op` + should be added to. + name: An optional name for the variable_scope that contains the metric + variables. + + Returns: + auc: A 1-D `Tensor` containing the current area-under-curve, lower, and + upper confidence interval values. + update_op: An operation that concatenates the input labels and predictions + to the accumulated values. + + Raises: + ValueError: If `labels`, `predictions`, and `weights` have mismatched shapes + or if `alpha` isn't in the range (0,1). + """ + if not (alpha > 0 and alpha < 1): + raise ValueError('alpha must be between 0 and 1; currently %.02f' % alpha) + + if weights is None: + weights = array_ops.ones_like(predictions) + + with variable_scope.variable_scope( + name, + default_name='auc_with_confidence_intervals', + values=[labels, predictions, weights]): + + predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access + predictions=predictions, + labels=labels, + weights=weights) + + total_weight = math_ops.reduce_sum(weights) + + weights = array_ops.reshape(weights, [-1]) + predictions = array_ops.reshape( + math_ops.cast(predictions, dtypes.float64), [-1]) + labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1]) + + with ops.control_dependencies([ + check_ops.assert_greater_equal( + labels, + array_ops.zeros_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is <0'), + check_ops.assert_less_equal( + labels, + array_ops.ones_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is >1'), + ]): + preds_accum, update_preds = streaming_concat( + predictions, name='concat_preds') + labels_accum, update_labels = streaming_concat(labels, + name='concat_labels') + weights_accum, update_weights = streaming_concat( + weights, name='concat_weights') + update_op_for_valid_case = control_flow_ops.group( + update_labels, update_preds, update_weights) + + # Only perform updates if this case is valid. + all_labels_positive_or_0 = math_ops.logical_and( + math_ops.equal(math_ops.reduce_min(labels), 0), + math_ops.equal(math_ops.reduce_max(labels), 1)) + sums_of_weights_at_least_1 = math_ops.greater_equal(total_weight, 1.0) + is_valid = math_ops.logical_and(all_labels_positive_or_0, + sums_of_weights_at_least_1) + + update_op = control_flow_ops.cond( + sums_of_weights_at_least_1, + lambda: update_op_for_valid_case, control_flow_ops.no_op) + + auc = _compute_placement_auc( + labels_accum, + preds_accum, + weights_accum, + alpha=alpha, + logit_transformation=logit_transformation, + is_valid=is_valid) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + if metrics_collections: + ops.add_to_collections(metrics_collections, auc) + return auc, update_op + + def precision_recall_at_equal_thresholds(labels, predictions, weights=None, @@ -3430,6 +3720,7 @@ def cohen_kappa(labels, __all__ = [ + 'auc_with_confidence_intervals', 'aggregate_metric_map', 'aggregate_metrics', 'cohen_kappa', diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index b4e365d10f..b387f26c01 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -2128,6 +2128,205 @@ class StreamingDynamicAUCTest(test.TestCase): self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-5) +class AucWithConfidenceIntervalsTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def _testResultsEqual(self, expected_dict, gotten_result): + """Tests that 2 results (dicts) represent the same data. + + Args: + expected_dict: A dictionary with keys that are the names of properties + of PrecisionRecallData and whose values are lists of floats. + gotten_result: A AucWithConfidenceIntervalData object. + """ + gotten_dict = {k: t.eval() for k, t in gotten_result._asdict().items()} + self.assertItemsEqual( + list(expected_dict.keys()), list(gotten_dict.keys())) + + for key, expected_values in expected_dict.items(): + self.assertAllClose(expected_values, gotten_dict[key]) + + def _testCase(self, predictions, labels, expected_result, weights=None): + """Performs a test given a certain scenario of labels, predictions, weights. + + Args: + predictions: The predictions tensor. Of type float32. + labels: The labels tensor. Of type bool. + expected_result: The expected result (dict) that maps to tensors. + weights: Optional weights tensor. + """ + with self.test_session() as sess: + predictions_tensor = constant_op.constant( + predictions, dtype=dtypes_lib.float32) + labels_tensor = constant_op.constant(labels, dtype=dtypes_lib.int64) + weights_tensor = None + if weights: + weights_tensor = constant_op.constant(weights, dtype=dtypes_lib.float32) + gotten_result, update_op = ( + metric_ops.auc_with_confidence_intervals( + labels=labels_tensor, + predictions=predictions_tensor, + weights=weights_tensor)) + + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + + self._testResultsEqual(expected_result, gotten_result) + + def testAucAllCorrect(self): + self._testCase( + predictions=[0., 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 1, 0, 0, 1, 0, 1, 1, 0], + expected_result={ + 'auc': 0.66666667, + 'lower': 0.27826795, + 'upper': 0.91208512, + }) + + def testAucUnorderedInput(self): + self._testCase( + predictions=[1.0, 0.6, 0., 0.3, 0.4, 0.2, 0.5, 0.3, 0.6, 0.8], + labels=[0, 1, 0, 1, 0, 0, 1, 0, 0, 1], + expected_result={ + 'auc': 0.66666667, + 'lower': 0.27826795, + 'upper': 0.91208512, + }) + + def testAucWithWeights(self): + self._testCase( + predictions=[0., 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 1, 0, 0, 1, 0, 1, 1, 0], + weights=[0.5, 0.6, 1.2, 1.5, 2.0, 2.0, 1.5, 1.2, 0.6, 0.5], + expected_result={ + 'auc': 0.65151515, + 'lower': 0.28918604, + 'upper': 0.89573906, + }) + + def testAucEqualOne(self): + self._testCase( + predictions=[0, 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], + expected_result={ + 'auc': 1.0, + 'lower': 1.0, + 'upper': 1.0, + }) + + def testAucEqualZero(self): + self._testCase( + predictions=[0, 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + expected_result={ + 'auc': 0.0, + 'lower': 0.0, + 'upper': 0.0, + }) + + def testNonZeroOnePredictions(self): + self._testCase( + predictions=[2.5, -2.5, .5, -.5, 1], + labels=[1, 0, 1, 0, 0], + expected_result={ + 'auc': 0.83333333, + 'lower': 0.15229267, + 'upper': 0.99286517, + }) + + def testAllLabelsOnes(self): + self._testCase( + predictions=[1., 1., 1., 1., 1.], + labels=[1, 1, 1, 1, 1], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testAllLabelsZeros(self): + self._testCase( + predictions=[0., 0., 0., 0., 0.], + labels=[0, 0, 0, 0, 0], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testWeightSumLessThanOneAll(self): + self._testCase( + predictions=[1., 1., 0., 1., 0., 0.], + labels=[1, 1, 1, 0, 0, 0], + weights=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testWithMultipleUpdates(self): + batch_size = 50 + num_batches = 100 + labels = np.array([]) + predictions = np.array([]) + tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.int32) + tf_predictions = variables.Variable( + array_ops.ones(batch_size), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.float32) + auc, update_op = metrics.auc_with_confidence_intervals(tf_labels, + tf_predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + for _ in xrange(num_batches): + new_labels = np.random.randint(0, 2, size=batch_size) + noise = np.random.normal(0.0, scale=0.2, size=batch_size) + new_predictions = 0.4 + 0.2 * new_labels + noise + labels = np.concatenate([labels, new_labels]) + predictions = np.concatenate([predictions, new_predictions]) + sess.run(tf_labels.assign(new_labels)) + sess.run(tf_predictions.assign(new_predictions)) + sess.run(update_op) + expected_auc = _np_auc(predictions, labels) + self.assertAllClose(expected_auc, auc.auc.eval()) + + def testExceptionOnFloatLabels(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([0.7, 0, 1, 0, 1]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + self.assertRaises(TypeError, sess.run(update_op)) + + def testExceptionOnGreaterThanOneLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([2, 1, 0, 1, 0]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is >1.*'): + sess.run(update_op) + + def testExceptionOnNegativeLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([1, 0, -1, 1, 0]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is <0.*'): + sess.run(update_op) + + class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): def setUp(self): -- GitLab From d81104a09de68a06e4b607cf8761f1e3affea829 Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Thu, 15 Feb 2018 13:35:19 -0800 Subject: [PATCH 234/629] Optimize dot(DynamicSlice(ConstA), ConstantB) by memoizing dot(ConstA, ConstB) Make transformation when ConstA and ConstB are 2D, and DynamicSlice is slicing a full row, column respectively. Handle: dot(DynamicSlice(Index, ConstA), ConstB) => DynamicSlice(Index, dot*(ConstA, ConstB)); and dot(ConstA, DynamicSlice(Index, ConstB)) => DynamicSlice(Index, dot*(ConstA, ConstB)); PiperOrigin-RevId: 185891869 --- .../xla/service/algebraic_simplifier.cc | 141 ++++++++++ .../xla/service/algebraic_simplifier_test.cc | 203 +++++++++++++++ .../compiler/xla/tests/dot_operation_test.cc | 246 ++++++++++++++++++ 3 files changed, 590 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index fb857559f9..6f6c2391f3 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -284,6 +284,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim, HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped); + StatusOr OptimizeDotOfGather(HloInstruction* dot); + // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -917,6 +919,134 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( return add_result; } +StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( + HloInstruction* dot) { + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); + if (dnums.lhs_contracting_dimensions_size() != 1 || + dnums.rhs_contracting_dimensions_size() != 1 || + dnums.lhs_batch_dimensions_size() != 0 || + dnums.rhs_batch_dimensions_size() != 0 || + dot->shape().dimensions_size() != 2) { // dot output 2D + VLOG(10) << "DotOfGather: Can only optimize 2D, non-batch dot operations."; + return nullptr; + } + + // Optimize either dot(DS(ctA), ctB)) or dot(ctB, DS(ctA)). + // Currently a Gather is a DynamicSlice. + auto is_dynamic_slice_constant_combination = + [](HloInstruction* a, HloInstruction* b, int a_contracting_dimension) { + // First operand is a DynamicSlice(Constant). + if (a->opcode() != HloOpcode::kDynamicSlice) { + return false; + } + auto* dynamic_slice_op = a->operand(0); + if (dynamic_slice_op->opcode() != HloOpcode::kConstant) { + return false; + } + // Second operand is a Constant. + if (b->opcode() != HloOpcode::kConstant) { + return false; + } + // The DynamicSlice output is a vector. + const Shape& dynamic_slice_shape = a->shape(); + if (dynamic_slice_shape.dimensions(1 - a_contracting_dimension) != 1) { + return false; + } + // Constant size is the same before and after slice in the contracting + // dimension, otherwise we either must precompute for all possible slice + // indices or dot is invalid. + const Shape& dynamic_slice_op_shape = dynamic_slice_op->shape(); + if (dynamic_slice_op_shape.dimensions(a_contracting_dimension) != + dynamic_slice_shape.dimensions(a_contracting_dimension)) { + return false; + } + return true; + }; + + HloInstruction* lhs = dot->mutable_operand(0); + HloInstruction* rhs = dot->mutable_operand(1); + int lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); + int rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); + + if (!is_dynamic_slice_constant_combination( + lhs, rhs, /*a_contracting_dimension=*/lhs_contracting_dimension) && + !is_dynamic_slice_constant_combination( + rhs, lhs, /*a_contracting_dimension=*/rhs_contracting_dimension)) { + VLOG(10) << "DotOfGather: Can only optimize dot(DS(ctA), ctB)) or " + "dot(ctB, DS(ctA)), where the two constants have equal " + "contracting dimensions."; + return nullptr; + } + + // LHS is DynamicSlice: + // input: dot(DS(ctA), ctB)) + // where DS(ctA) = DS({M x K}, {start, 0}, {1, K}) and ctB = {K x N}. + // => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {start, 0}, {1, N}) => {1 x N}. + + // RHS is DynamicSlice: + // input: dot(ctA, DS(ctB)) + // where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, start}, {K, 1}). + // => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {0, start}, {M, 1}) => {M x 1}. + + bool lhs_is_dynamic_slice = lhs->opcode() == HloOpcode::kDynamicSlice; + + // ctA: + HloInstruction* left_operand = + lhs_is_dynamic_slice ? lhs->mutable_operand(0) : lhs; + // ctB: + HloInstruction* right_operand = + lhs_is_dynamic_slice ? rhs : rhs->mutable_operand(0); + // Build ctA x ctB. + const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); + const int n = + right_operand->shape().dimensions(1 - rhs_contracting_dimension); + auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); + auto* memoized_inst = computation_->AddInstruction(HloInstruction::CreateDot( + memoized_shape, left_operand, right_operand, dnums)); + // Get pair {start, 0} or {0, start}. + HloInstruction* original_start_indices = + lhs_is_dynamic_slice ? lhs->mutable_operand(1) : rhs->mutable_operand(1); + // Position of start: + int index_of_non_zero_start = lhs_is_dynamic_slice + ? 1 - lhs_contracting_dimension + : 1 - rhs_contracting_dimension; + // Position of zero: + int index_of_zero_start = 1 - index_of_non_zero_start; + + // Slice out start and 0 components and reorder if necessary. + auto indices_type = original_start_indices->shape().element_type(); + Shape s_shape = ShapeUtil::MakeShape(indices_type, {1}); + Shape d_shape = ShapeUtil::MakeShape(indices_type, {2}); + HloInstruction* non_zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_non_zero_start}, + {index_of_non_zero_start + 1}, {1})); + HloInstruction* zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_zero_start}, + {index_of_zero_start + 1}, {1})); + HloInstruction* new_start_indices = + lhs_is_dynamic_slice + ? computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {non_zero_start, zero_start}, 0)) + : computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {zero_start, non_zero_start}, 0)); + + // Build DynamicSlice(ctA x ctB). + const int new_slice_m = lhs_is_dynamic_slice ? 1 : m; + const int new_slice_n = lhs_is_dynamic_slice ? n : 1; + auto* memoized_lookup = + computation_->AddInstruction(HloInstruction::CreateDynamicSlice( + dot->shape(), memoized_inst, new_start_indices, + {new_slice_m, new_slice_n})); + + return memoized_lookup; +} + Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { auto lhs = dot->mutable_operand(0); auto rhs = dot->mutable_operand(1); @@ -946,6 +1076,17 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { return ReplaceInstruction(dot, dot_of_concat_optimized); } + // Simplify dot(ConstA, Gather(Index, ConstB)) to: + // Gather(Index, dot*(ConstA, ConstB)), where dot* is an appropriately + // batched version of dot. + TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_gather_optimized, + OptimizeDotOfGather(dot)); + if (dot_of_gather_optimized) { + VLOG(10) << "Replaced dot(constA, gather(i, constB)) with " + "gather(i, dot*(constA, constB))"; + return ReplaceInstruction(dot, dot_of_gather_optimized); + } + if (enable_dot_strength_reduction_ && !is_layout_sensitive_) { TF_ASSIGN_OR_RETURN(bool did_strength_reduction, HandleDotStrengthReduction(dot)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 0f08eb3a32..fc78420147 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2772,5 +2772,208 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); + +struct DotOfGatherTestSpec { + int64 m; + int64 k; + int64 n; + int s; // start index for dynamic slice on the non-contracting dimension + int64 lcd; // left contracting dimension + int64 rcd; // right contracting dimension + bool neg; // is negative testcase +}; + +class DotOfGatherSimplificationTest + : public HloVerifiedTestBase, + public ::testing::WithParamInterface {}; + +// input: dot(DS(ctA), ctB)) +// where DS(ctA) = DS({M x K}, {s, 0}, {1, K}) and ctB = {K x N}. +// => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {s, 0}, {1, N}) => {1 x N}. +TEST_P(DotOfGatherSimplificationTest, ConstantRHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.m); + + // For negative tests, increase k of the dynamic slice argument to prevent the + // optimization (constants ctA, ctB must have equal contracting dimensions). + int64 k_increase = spec.neg ? 5 : 0; + int64 lhs_rows = (spec.lcd == 0) ? (spec.k + k_increase) : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : (spec.k + k_increase); + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + int32 start_row = (spec.lcd == 0) ? 0 : spec.s; + int32 start_col = (spec.lcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.lcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.lcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, lhs, start_indices, {slice_row_size, slice_col_size})); + + int64 rhs_rows = (spec.rcd == 0) ? spec.k : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : spec.k; + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = 1; + int64 dot_col_size = spec.n; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, ds, rhs, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +// input: dot(ctA, DS(ctB)) +// where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, s}, {K, 1}). +// => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {0, s}, {M, 1}) => {M x 1}. +TEST_P(DotOfGatherSimplificationTest, ConstantLHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.n); + + int64 lhs_rows = (spec.lcd == 0) ? spec.k : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : spec.k; + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + // For negative tests increase k of the dynamic slice argument to prevent the + // optimization + int64 k_increase = spec.neg ? 5 : 0; + int64 rhs_rows = (spec.rcd == 0) ? (spec.k + k_increase) : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : (spec.k + k_increase); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + int32 start_row = (spec.rcd == 0) ? 0 : spec.s; + int32 start_col = (spec.rcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.rcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.rcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, rhs, start_indices, {slice_row_size, slice_col_size})); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = spec.m; + int64 dot_col_size = 1; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, lhs, ds, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +std::vector DotOfGatherPositiveNegativeTests() { + std::vector positives = { + // "Classical dot", i.e. matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + // Note: testing for m=1 and n=1 is unnecessary, as this optimizes to + // dot(ct, ct) before DotOfGather optimization kicks in. + // Contract on rows: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + // Reverse matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + // Contract on columns: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + }; + std::vector all; + for (int i = 0; i < positives.size(); i++) { + DotOfGatherTestSpec positive_test = positives[i]; + all.push_back(positive_test); + DotOfGatherTestSpec negative_test = positive_test; + negative_test.neg = true; + all.push_back(negative_test); + } + return all; +} + +INSTANTIATE_TEST_CASE_P( + DotOfGatherSimplificationTestInstantiation, DotOfGatherSimplificationTest, + ::testing::ValuesIn(DotOfGatherPositiveNegativeTests())); + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 6b0c04c2c0..63354d4b30 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -703,5 +703,251 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { &builder, expected, {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); } + +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{96.0, 105.0, 114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{105.0}, {105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstRHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{105.0, 105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstLHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{96.0}, {105.0}, {114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{126.0, 129.0, 132.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{129.0}, {129.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{56.0, 168.0, 91.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{168.0}, {168.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} } // namespace } // namespace xla -- GitLab From 82d67d0af2ed13bdf003e69486f3f477961ef407 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 15 Feb 2018 13:40:10 -0800 Subject: [PATCH 235/629] Wrap XlaOpRegistry::DeviceKernels call to call in python. PiperOrigin-RevId: 185892888 --- tensorflow/compiler/tf2xla/python/BUILD | 21 +++++++++++++++++++ .../tf2xla/python/xla_op_registry.clif | 7 +++++++ tensorflow/compiler/tf2xla/xla_op_registry.cc | 2 ++ tensorflow/core/BUILD | 7 +++++++ 4 files changed, 37 insertions(+) create mode 100644 tensorflow/compiler/tf2xla/python/BUILD create mode 100644 tensorflow/compiler/tf2xla/python/xla_op_registry.clif diff --git a/tensorflow/compiler/tf2xla/python/BUILD b/tensorflow/compiler/tf2xla/python/BUILD new file mode 100644 index 0000000000..49bde78039 --- /dev/null +++ b/tensorflow/compiler/tf2xla/python/BUILD @@ -0,0 +1,21 @@ +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = ["//tensorflow:internal"], +) + +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_py_clif_cc", +) + +tf_py_clif_cc( + name = "xla_op_registry", + srcs = ["xla_op_registry.clif"], + pyclif_deps = [ + "//tensorflow/core:framework/kernel_def_pyclif", + ], + deps = [ + "//tensorflow/compiler/tf2xla:xla_compiler", + ], +) diff --git a/tensorflow/compiler/tf2xla/python/xla_op_registry.clif b/tensorflow/compiler/tf2xla/python/xla_op_registry.clif new file mode 100644 index 0000000000..e1ee6cc656 --- /dev/null +++ b/tensorflow/compiler/tf2xla/python/xla_op_registry.clif @@ -0,0 +1,7 @@ +from "third_party/tensorflow/core/framework/kernel_def_pyclif.h" import * # KernelDef + +from "third_party/tensorflow/compiler/tf2xla/xla_op_registry.h": + namespace `tensorflow`: + def `XlaOpRegistry::DeviceKernels` as + device_kernels(device: str, include_compilation_only_kernels: bool) -> + list diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc index 0dde6a986c..bbe808595d 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.cc +++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc @@ -255,6 +255,8 @@ void XlaOpRegistry::RegisterCompilationKernels() { std::vector XlaOpRegistry::DeviceKernels( const string& compilation_device_name, bool include_compilation_only_kernels) { + // Ensure compilation kernels registered. + RegisterCompilationKernels(); std::vector kernels; XlaOpRegistry& registry = Instance(); mutex_lock lock(registry.mutex_); diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 8eb5c11969..30ac270109 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1335,6 +1335,13 @@ tf_pyclif_proto_library( visibility = ["//visibility:public"], ) +tf_pyclif_proto_library( + name = "framework/kernel_def_pyclif", + proto_lib = ":protos_all_cc", + proto_srcfile = "framework/kernel_def.proto", + visibility = ["//visibility:public"], +) + tf_pyclif_proto_library( name = "framework/node_def_pyclif", proto_lib = ":protos_all_cc", -- GitLab From 8745e3426713068e7061b3aae368ebb4db8dc2cc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 15 Feb 2018 13:43:47 -0800 Subject: [PATCH 236/629] Object-based saving: Switch to "everything is Checkpointable" The only sane way to use/test this is to have Variables be Checkpointable, so this CL includes a move of the base class to core. No public methods are exposed, and I've attempted to not throw any errors on __setattr__. Allows dynamic dependencies (track after restore) and restoring variables on assignment to a Checkpointable object, and includes the protocol buffer modifications necessary for saving information with each object. There are still some prominent TODOs: - Stop modifying the graph after the first save/restore (likely cache ops in Checkpointable objects) - Add some overridable methods for saving Python strings when restore() is called, fed when graph building rather than embedded as constants in the graph - Work on the initialization story for graph building. Currently the unit tests rely on collections for this. - Support for more objects, move the prototype modifications in checkpointable_test to core. The diff is larger than I was hoping (mostly deletions and unit tests); that could be reduced a bit (or at least "lines added" converted to "lines deleted") by diffbasing on cl/180950921, which was my first attempt at dynamic dependencies. This CL is more of a re-write than a modification, so sending that one out seems a bit silly. The unit tests are still good, though. PiperOrigin-RevId: 185893387 --- .../proto/checkpointable_object_graph.proto | 44 +- tensorflow/contrib/eager/python/BUILD | 13 +- .../contrib/eager/python/checkpointable.py | 773 ---------------- .../eager/python/checkpointable_test.py | 497 ---------- .../eager/python/checkpointable_utils.py | 413 +++++++++ .../eager/python/checkpointable_utils_test.py | 857 ++++++++++++++++++ tensorflow/python/BUILD | 25 + .../python/ops/resource_variable_ops.py | 6 + tensorflow/python/ops/variables.py | 22 +- tensorflow/python/training/checkpointable.py | 584 ++++++++++++ .../python/training/checkpointable_test.py | 39 + tensorflow/python/training/optimizer.py | 47 +- .../api/golden/tensorflow.-variable.pbtxt | 1 + ...tensorflow.train.-adadelta-optimizer.pbtxt | 1 + ...sorflow.train.-adagrad-d-a-optimizer.pbtxt | 1 + .../tensorflow.train.-adagrad-optimizer.pbtxt | 1 + .../tensorflow.train.-adam-optimizer.pbtxt | 1 + .../tensorflow.train.-ftrl-optimizer.pbtxt | 1 + ...ow.train.-gradient-descent-optimizer.pbtxt | 1 + ...tensorflow.train.-momentum-optimizer.pbtxt | 1 + .../golden/tensorflow.train.-optimizer.pbtxt | 1 + ...ow.train.-proximal-adagrad-optimizer.pbtxt | 1 + ...-proximal-gradient-descent-optimizer.pbtxt | 1 + ...nsorflow.train.-r-m-s-prop-optimizer.pbtxt | 1 + ...rflow.train.-sync-replicas-optimizer.pbtxt | 1 + tensorflow/tools/pip_package/BUILD | 2 +- 26 files changed, 2033 insertions(+), 1302 deletions(-) delete mode 100644 tensorflow/contrib/eager/python/checkpointable.py delete mode 100644 tensorflow/contrib/eager/python/checkpointable_test.py create mode 100644 tensorflow/contrib/eager/python/checkpointable_utils.py create mode 100644 tensorflow/contrib/eager/python/checkpointable_utils_test.py create mode 100644 tensorflow/python/training/checkpointable.py create mode 100644 tensorflow/python/training/checkpointable_test.py diff --git a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto index 4f71aec96a..024765acb2 100644 --- a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto +++ b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto @@ -4,9 +4,9 @@ option cc_enable_arenas = true; package tensorflow.contrib.eager; -// Prototype for an addition to BundleHeaderProto which saves extra information -// about the objects which own variables, allowing for more robust checkpoint -// loading into modified programs. +// Prototype format which saves extra information about the objects which own +// variables, allowing for more robust checkpoint loading into modified +// programs. Currently stored in its own entry in a TensorBundle. message CheckpointableObjectGraph { message Object { @@ -18,37 +18,35 @@ message CheckpointableObjectGraph { string local_name = 2; } - message VariableReference { - // A name for the variable which is unique within the object which owns - // it. Does not include a name_scope or variable_scope prefix. - string local_name = 1; - // The full name of the variable. Used to allow name-based loading of - // checkpoints which were saved using an object-based API. + message SerializedTensor { + // A name for the Tensor. Simple variables have only one + // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may + // be restored on object creation as an optimization. + string name = 1; + // The full name of the variable/tensor, if applicable. Used to allow + // name-based loading of checkpoints which were saved using an + // object-based API. Should match the checkpoint key which would have been + // assigned by tf.train.Saver. string full_name = 2; - // The generated name of the variable in the checkpoint. + // The generated name of the Tensor in the checkpoint. string checkpoint_key = 3; } message SlotVariableReference { - // An index into `CheckpointableObjectGraph.nodes`, indicating the object - // which created the variable that this variable is slotting for. + // An index into `CheckpointableObjectGraph.nodes`, indicating the + // variable object this slot was created for. int32 original_variable_node_id = 1; - // The local name of the variable being slotted for within the object that - // owns it. - string original_variable_local_name = 2; // The name of the slot (e.g. "m"/"v"). - string slot_name = 3; - // The full name of the slot variable. Used to allow name-based loading of - // checkpoints which were saved using an object-based API. - string full_name = 4; - // The generated name of the variable in the checkpoint. - string checkpoint_key = 5; + string slot_name = 2; + // An index into `CheckpointableObjectGraph.nodes`, indicating the + // `Object` with the value of the slot variable. + int32 slot_variable_node_id = 3; } // Objects which this object depends on. repeated ObjectReference children = 1; - // Non-slot variables owned by this object. - repeated VariableReference variables = 2; + // Serialized data specific to this object. + repeated SerializedTensor attributes = 2; // Slot variables owned by this object. repeated SlotVariableReference slot_variables = 3; } diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index cfb38a1d26..ad40e55cb4 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -220,18 +220,19 @@ py_test( ) py_library( - name = "checkpointable", - srcs = ["checkpointable.py"], + name = "checkpointable_utils", + srcs = ["checkpointable_utils.py"], srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/contrib/eager/proto:checkpointable_object_graph_proto_py", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:io_ops", "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:state_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:training", "//tensorflow/python:variable_scope", @@ -240,11 +241,11 @@ py_library( ) py_test( - name = "checkpointable_test", - srcs = ["checkpointable_test.py"], + name = "checkpointable_utils_test", + srcs = ["checkpointable_utils_test.py"], srcs_version = "PY2AND3", deps = [ - ":checkpointable", + ":checkpointable_utils", ":network", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", diff --git a/tensorflow/contrib/eager/python/checkpointable.py b/tensorflow/contrib/eager/python/checkpointable.py deleted file mode 100644 index 896b38a734..0000000000 --- a/tensorflow/contrib/eager/python/checkpointable.py +++ /dev/null @@ -1,773 +0,0 @@ -"""An object-local variable management scheme.""" -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import re -import weakref - -from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 -from tensorflow.python.eager import context -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import io_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import optimizer as optimizer_lib -from tensorflow.python.training import saver as saver_lib -from tensorflow.python.training import slot_creator -from tensorflow.python.training import training - -_CheckpointableReference = collections.namedtuple( - "_CheckpointableReference", - [ - # The local name if explicitly specified, else None. - "name", - # The Checkpointable object being referenced. - "ref" - ]) - -# Validation regular expression for the local names of Checkpointable -# objects. In particular, disallows "/" in names, and reserves dash-prefixed -# names (which are not valid Python identifiers, so we're not restricting the -# __setattr__ syntax that way). -_VALID_LOCAL_NAME = re.compile(r"^[A-Za-z0-9_.][A-Za-z0-9_.-]*$") - -# Keyword for identifying that the next bit of a checkpoint variable name is a -# slot name. May not be the local name of a checkpointable. Checkpoint names for -# slot variables look like: -# -# /<_OPTIMIZER_SLOTS_NAME>// -# -# Where is a full path from the checkpoint root to the -# variable being slotted for. -_OPTIMIZER_SLOTS_NAME = "-OPTIMIZER_SLOT" - - -def _assign_existing_variable(variable_to_restore, value_pointer): - """Set a variable from a _ValuePointer object.""" - base_type = variable_to_restore.dtype.base_dtype - with ops.colocate_with(variable_to_restore): - # TODO(allenl): Handle partitioned variables - value_to_restore, = io_ops.restore_v2( - prefix=value_pointer.save_path, - tensor_names=[value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="checkpoint_initializer") - initializer_op = state_ops.assign(variable_to_restore, value_to_restore) - variable_to_restore._initializer_op = initializer_op # pylint:disable=protected-access - if value_pointer.session is not None: - value_pointer.session.run(initializer_op) - - -def _default_getter(name, shape, dtype, initializer=None, - partition_info=None, **kwargs): - """A pared-down version of get_variable which does not reuse variables.""" - dtype = dtypes.as_dtype(dtype) - shape_object = tensor_shape.as_shape(shape) - with ops.init_scope(): - if initializer is None: - initializer, initializing_from_value = ( - variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access - name=name, shape=shape_object, dtype=dtype)) - else: - initializing_from_value = not callable(initializer) - # Same logic as get_variable - if initializing_from_value: - if shape is not None: - raise ValueError("If initializer is a constant, do not specify shape.") - initial_value = initializer - variable_dtype = None - else: - # Instantiate initializer if provided initializer is a type object. - if isinstance(initializer, type(init_ops.Initializer)): - initializer = initializer(dtype=dtype) - def initial_value(): - return initializer( - shape_object.as_list(), dtype=dtype, partition_info=partition_info) - variable_dtype = dtype.base_dtype - return resource_variable_ops.ResourceVariable( - initial_value=initial_value, - name=name, - dtype=variable_dtype, - **kwargs - ) - - -class Checkpointable(object): - """Manages variables and dependencies on other objects. - - To make reliable checkpoints, all `Checkpointable`s on which this object - depends must be registered in the constructor using `track_checkpointable` in - a deterministic order, and if possible they should be named. Variables may be - created using `add_variable` outside of the constructor and in any order, but - only these variables will be saved. - """ - - def __init__(self): - # A list of _CheckpointableReference objects. - self._checkpoint_dependencies = [] - # Maps names -> Checkpointable objects for named dependencies - self._dependency_names = {} - # Set of all tracked Checkpointables - self._already_tracked = set() - self._owned_variables = {} # local name -> variable object - self._deferred_restorations = {} # local name -> _VariableRestoration - # object - - def __setattr__(self, name, value): - """Support self.foo = checkpointable syntax. - - `self.foo = checkpointable` is equivalent to - `self.foo = self.track_checkpointable(checkpointable, name='foo')`. - - No new tracking if `value` is not a `Checkpointable`, or if `value` is - already being tracked (either because of an explicit `track_checkpointable` - or a previous `__setattr__`). - - Args: - name: The name of the property being set. - value: The new value for the property. - """ - # Give child classes (e.g. Network) priority, then track only if the object - # hasn't been added to _already_tracked. - super(Checkpointable, self).__setattr__(name, value) - if (isinstance(value, Checkpointable) - and value not in self._already_tracked): - self.track_checkpointable(value, name=name) - - def add_variable(self, name, shape=None, dtype=dtypes.float32, - initializer=None, **kwargs): - """Create a new variable object to be saved with this `Checkpointable`. - - If the user has requested that this object or another `Checkpointable` which - depends on this object be restored from a checkpoint (deferred loading - before variable object creation), `initializer` may be ignored and the value - from the checkpoint used instead. - - Args: - name: A name for the variable. Must be unique within this object. - shape: The shape of the variable. - dtype: The data type of the variable. - initializer: The initializer to use. Ignored if deferred loading has been - requested. - **kwargs: Passed to the ResourceVariable constructor. - - Returns: - The new variable object. - - Raises: - ValueError: If the variable name is not unique. - RuntimeError: If __init__ has not been called. - """ - if not hasattr(self, "_owned_variables"): - raise RuntimeError("Need to call Checkpointable.__init__ before adding " - "variables.") - if name in self._owned_variables: - raise ValueError( - ("A variable named '%s' already exists in this Checkpointable, but " - "Checkpointable.add_variable called to create another with " - "that name. Variable names must be unique within a Checkpointable " - "object.") % (name,)) - if "getter" in kwargs: - # Allow the getter to be overridden, typically because there is a need for - # compatibility with some other variable creation mechanism. This should - # be relatively uncommon in user code. - getter = kwargs.pop("getter") - else: - getter = _default_getter - deferred_restoration = self._deferred_restorations.pop(name, None) - if deferred_restoration is not None: - dtype = deferred_restoration.value_pointer.dtype - base_type = dtype.base_dtype - # TODO(allenl): Handle partitioned variables here too - with ops.init_scope(): - initializer, = io_ops.restore_v2( - prefix=deferred_restoration.value_pointer.save_path, - tensor_names=[deferred_restoration.value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="checkpoint_initializer") - # We need to un-set the shape so get_variable doesn't complain, but we - # also need to set the static shape information on the initializer if - # possible so we don't get a variable with an unknown shape. - initializer.set_shape(shape) - # Un-set shape since we're using a constant initializer - shape = None - - new_variable = getter( - name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs) - if deferred_restoration is not None: - if deferred_restoration.value_pointer.session is not None: - deferred_restoration.value_pointer.session.run(new_variable.initializer) - for slot_restoration in deferred_restoration.slot_restorations: - strong_ref = slot_restoration.optimizer_ref() - if strong_ref is None: - # If the optimizer object has been garbage collected, there's no need - # to create the slot variable. - continue - strong_ref._process_slot_restoration( # pylint: disable=protected-access - slot_restoration, new_variable) - self._owned_variables[name] = new_variable - return new_variable - - def track_checkpointable(self, checkpointable, name): - """Declare a dependency on another `Checkpointable` object. - - Indicates that checkpoints for this object should include variables from - `checkpointable`. - - Variables in a checkpoint are mapped to `Checkpointable`s based on names. To - avoid breaking existing checkpoints when modifying a class, neither variable - names nor dependency names (the names passed to `track_checkpointable`) may - change. - - Args: - checkpointable: A `Checkpointable` which this object depends on. - name: A local name for `checkpointable`, used for loading checkpoints into - the correct objects. Python 2 identifiers are valid names, with the - addition of leading numerals, periods anywhere, and non-leading dashes. - Specifically names must match the regular expression - `^[A-Za-z0-9_.][A-Za-z0-9_.-]*$`. - - Returns: - `checkpointable`, for convenience when declaring a dependency and - assigning to a member variable in one statement. - - Raises: - RuntimeError: If __init__ was not called. - TypeError: If `checkpointable` does not inherit from `Checkpointable`. - ValueError: For invalid names. - """ - if not hasattr(self, "_checkpoint_dependencies"): - raise RuntimeError("Need to call Checkpointable.__init__ before calling " - "Checkpointable.track_checkpointable().") - if not isinstance(checkpointable, Checkpointable): - raise TypeError( - ("Checkpointable.track_checkpointable() passed type %s, not a " - "Checkpointable.") % (type(checkpointable),)) - if not _VALID_LOCAL_NAME.match(name): - raise ValueError( - ("Checkpointable names must match the regular expression '%s', but " - "got an invalid name '%s' instead.") % (_VALID_LOCAL_NAME.pattern, - name)) - if (name in self._dependency_names - and self._dependency_names[name] is not checkpointable): - raise ValueError( - ("Called Checkpointable.track_checkpointable() with name='%s', but " - "a Checkpointable with this name is already declared as a " - "dependency. Names must be unique.") % (name,)) - self._dependency_names[name] = checkpointable - self._checkpoint_dependencies.append( - _CheckpointableReference(name=name, ref=checkpointable)) - self._already_tracked.add(checkpointable) - return checkpointable - - def _process_restoration(self, restoration): - """Restore a variable and its slot variables (may be deferred).""" - variable_to_restore = self._owned_variables.get(restoration.name, None) - if variable_to_restore is not None: - # This variable already exists, so just do an assignment for this and any - # slot variables which depend on it. - _assign_existing_variable( - variable_to_restore, value_pointer=restoration.value_pointer) - for slot_restoration in restoration.slot_restorations: - strong_ref = slot_restoration.optimizer_ref() - if strong_ref is None: - continue - strong_ref._process_slot_restoration( # pylint: disable=protected-access - slot_restoration, variable_to_restore) - else: - # Save this restoration for later. This intentionally overwrites any - # previous deferred restorations, since that gives the same semantics as - # direct assignment. - self._deferred_restorations[restoration.name] = restoration - - def _process_slot_restoration(self, slot_restoration, variable): - """Restore a slot variable's value (creating it if necessary).""" - # TODO(allenl): Move this to Optimizer - assert isinstance(self, optimizer_lib.Optimizer) - named_slots = self._slot_dict(slot_restoration.slot_name) - variable_key = optimizer_lib._var_key(variable) # pylint: disable=protected-access - existing_slot_variable = named_slots.get(variable_key, None) - if existing_slot_variable is None: - base_dtype = slot_restoration.value_pointer.dtype.base_dtype - initializer, = io_ops.restore_v2( - prefix=slot_restoration.value_pointer.save_path, - tensor_names=[slot_restoration.value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_dtype], - name="checkpoint_initializer") - new_slot_variable = slot_creator.create_slot(variable, initializer, - slot_restoration.slot_name) - if slot_restoration.value_pointer.session is not None: - slot_restoration.value_pointer.session.run( - new_slot_variable.initializer) - named_slots[variable_key] = new_slot_variable - else: - _assign_existing_variable( - existing_slot_variable, value_pointer=slot_restoration.value_pointer) - - @property - def checkpoint_dependencies(self): - """Other `Checkpointable` objects on which this object depends.""" - return self._checkpoint_dependencies - - -def _breadth_first_checkpointable_traversal(root_checkpointable): - """Find shortest paths to all variables owned by dependencies of root.""" - bfs_sorted = [] - root_checkpointable_reference = _CheckpointableReference( - name=None, ref=root_checkpointable) - to_visit = collections.deque([root_checkpointable_reference]) - path_to_root = {root_checkpointable_reference: ()} - while to_visit: - current_checkpointable = to_visit.popleft() - bfs_sorted.append(current_checkpointable) - for child_checkpointable in ( - current_checkpointable.ref.checkpoint_dependencies): - if child_checkpointable not in path_to_root: - path_to_root[child_checkpointable] = ( - path_to_root[current_checkpointable] + (child_checkpointable,)) - to_visit.append(child_checkpointable) - return bfs_sorted, path_to_root - - -def _object_prefix_from_path(path_to_root): - return "/".join( - (checkpointable.name for checkpointable in path_to_root)) - - -def _escape_variable_name(variable_name): - # We need to support slashes in variable names for compatibility, since this - # naming scheme is being patched in to things like Layer.add_variable where - # slashes were previously accepted. We also want to use slashes to indicate - # edges traversed to reach the variable, so we escape forward slashes in - # variable names. - return variable_name.replace("_S_", "_S_.").replace(r"/", r"_S__") - - -def _variable_naming_for_object(path_to_root): - """Make a function for naming variables in an object.""" - # Name non-slot variables: - # - # / - # - # is not necessarily unique, but this is fine since we also - # save the graph of `Checkpointable`s with the checkpoint. Even if this path - # no longer exists because of a change in the Python program, we can look up - # the `Checkpointable` which owns the variable in the checkpoint's graph and - # use another path if one still exists. - - object_prefix = _object_prefix_from_path(path_to_root) - if object_prefix: - object_prefix += "/" - - def _name_single_variable(local_name): - """Names a variable within an object.""" - return object_prefix + _escape_variable_name(local_name) - - return _name_single_variable - - -def _slot_variable_naming_for_optimizer(optimizer, path_to_root): - """Make a function for naming slot variables in an optimizer.""" - # Name slot variables: - # - # /<_OPTIMIZER_SLOTS_NAME>// - # - # where is exactly the checkpoint name used for the original - # variable, including the path from the checkpoint root and the local name in - # the object which owns it. Note that we only save slot variables if the - # variable it's slotting for is also being saved. - - optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, - _object_prefix_from_path(path_to_root)) - - def _name_slot_variable(variable_path, slot_name): - """With an optimizer specified, name a slot variable.""" - - if not _VALID_LOCAL_NAME.match(slot_name): - # Slot variable names include the name of the slot. We need to - # validate that part of the name to be sure that the checkpoint name - # is a valid name scope name. - raise ValueError( - ("Could not save slot variables for optimizer %s, because its " - "slot name has invalid characters (got '%s', was expecting it " - "to match the regular expression '%s').") % - (optimizer, slot_name, _VALID_LOCAL_NAME.pattern)) - - return variable_path + optimizer_identifier + slot_name - - return _name_slot_variable - - -def _serialize_non_slot_variables(checkpointable_objects, path_to_root, - object_graph_proto): - """Name non-slot variables and add them to `object_graph_proto`.""" - named_variables = {} - non_slot_variables = [] - checkpoint_node_ids = {} - - for checkpoint_id, checkpointable in enumerate(checkpointable_objects): - checkpoint_node_ids[checkpointable] = checkpoint_id - - for checkpoint_id, checkpointable in enumerate(checkpointable_objects): - naming_scheme = _variable_naming_for_object(path_to_root[checkpointable]) - object_proto = object_graph_proto.nodes.add() - for (local_name, owned_variable) in sorted( - checkpointable.ref._owned_variables.items(), # pylint: disable=protected-access - key=lambda x: x[0]): - variable_name = naming_scheme(local_name) - named_variables[variable_name] = owned_variable - non_slot_variables.append(( - variable_name, # The variable's full checkpoint name - owned_variable, # The variable object - local_name, # The variable's local name - checkpoint_id)) # The checkpoint ID of the node which owns this - # variable. - variable_proto = object_proto.variables.add() - variable_proto.local_name = local_name - variable_proto.checkpoint_key = variable_name - # Figure out the name-based Saver's name for this variable. - saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( - [owned_variable], convert_variable_to_tensor=False) - variable_full_name, = saver_dict.keys() - variable_proto.full_name = variable_full_name - - for child in checkpointable.ref.checkpoint_dependencies: - child_proto = object_proto.children.add() - child_proto.node_id = checkpoint_node_ids[child] - child_proto.local_name = child.name - return named_variables, non_slot_variables - - -def _serialize_slot_variables(checkpointable_objects, path_to_root, - non_slot_variables, object_graph_proto): - """Name slot variables and add them to `object_graph_proto`.""" - named_slot_variables = {} - for optimizer_checkpoint_id, checkpointable_ref in enumerate( - checkpointable_objects): - if isinstance(checkpointable_ref.ref, optimizer_lib.Optimizer): - optimizer_object_proto = object_graph_proto.nodes[optimizer_checkpoint_id] - naming_scheme = _slot_variable_naming_for_optimizer( - optimizer=checkpointable_ref.ref, - path_to_root=path_to_root[checkpointable_ref]) - slot_names = checkpointable_ref.ref.get_slot_names() - for (variable_path, original_variable, original_variable_local_name, - original_node_checkpoint_id) in non_slot_variables: - for slot_name in slot_names: - slot_variable = checkpointable_ref.ref.get_slot( - original_variable, slot_name) - if slot_variable is not None: - checkpoint_name = naming_scheme( - variable_path=variable_path, slot_name=slot_name) - named_slot_variables[checkpoint_name] = slot_variable - slot_variable_proto = optimizer_object_proto.slot_variables.add() - slot_variable_proto.slot_name = slot_name - slot_variable_proto.checkpoint_key = checkpoint_name - # Figure out the name-based Saver's name for this variable. - saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( - [slot_variable], convert_variable_to_tensor=False) - slot_variable_full_name, = saver_dict.keys() - slot_variable_proto.full_name = slot_variable_full_name - slot_variable_proto.original_variable_local_name = ( - original_variable_local_name) - slot_variable_proto.original_variable_node_id = ( - original_node_checkpoint_id) - return named_slot_variables - - -# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct -# a root Checkpointable if passed a list of Checkpointables). -def _serialize_object_graph(root_checkpointable): - """Determine checkpoint keys for variables and build a serialized graph. - - Non-slot variables are keyed based on a shortest path from the root saveable - to the object which owns the variable (i.e. the one which called - `Checkpointable.add_variable` to create it). - - Slot variables are keyed based on a shortest path to the variable being - slotted for, a shortest path to their optimizer, and the slot name. - - Args: - root_checkpointable: A `Checkpointable` object whose variables (including - the variables of dependencies, recursively) should be saved. - - Returns: - A tuple of (named_variables, object_graph_proto): - named_variables: A dictionary mapping names to variable objects. - object_graph_proto: A CheckpointableObjectGraph protocol buffer containing - the serialized object graph and variable references. - - Raises: - ValueError: If there are invalid characters in an optimizer's slot names. - """ - checkpointable_objects, path_to_root = ( - _breadth_first_checkpointable_traversal(root_checkpointable)) - object_graph_proto = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph()) - - # Gather non-slot variables. - named_variables, non_slot_variables = _serialize_non_slot_variables( - checkpointable_objects, path_to_root, object_graph_proto) - - # Gather slot variables which are associated with variables gathered above. - named_slot_variables = _serialize_slot_variables( - checkpointable_objects, path_to_root, non_slot_variables, - object_graph_proto) - - named_variables.update(named_slot_variables) - return named_variables, object_graph_proto - - -def _set_reference(reference_proto_table, key, checkpointable, parent, - object_id_map): - """Record a checkpoint<->object correspondence, with error checking. - - Args: - reference_proto_table: Map from names or numbers to `ObjectReference` protos - within the parent object. - key: Either a numeric or string identifier for the reference. - checkpointable: The object to record a correspondence for. - parent: The parent Python object, for creating a useful error message. - object_id_map: The map from `node_id` to Python object in which to record - the reference. - Returns: - The `node_id` of the Object proto corresponding to the specified Python - object. - Raises: - AssertionError: If another object is already bound to the `Object` proto. - """ - reference_proto = reference_proto_table[key] - set_reference = object_id_map.setdefault(reference_proto.node_id, - checkpointable) - if set_reference is not checkpointable: - raise AssertionError( - ("Unable to load the checkpoint into this object graph. Either " - "the Checkpointable object references in the Python program " - "have changed in an incompatible way, or the checkpoint was " - "generated in an incompatible program.\n\nTwo checkpoint " - "references (one being '%s' in %s) resolved to different " - "objects (%s and %s).") % (key, parent, set_reference, - checkpointable)) - return reference_proto.node_id - - -def _checkpoint_object_id_map(root_checkpointable, object_graph_proto): - """Match a checkpointed object graph to a Python object graph. - - Args: - root_checkpointable: A Checkpointable object. - object_graph_proto: A CheckpointableObjectGraph protocol buffer representing - a serialized object graph. - Returns: - A dictionary mapping from checkpoint node ids (indices into - `object_graph_proto.nodes`) to `Checkpointable` objects which are - dependencies of `root_checkpointable`. - """ - node_list = object_graph_proto.nodes - # Queue of (checkpointable object, node id) - to_visit = collections.deque([(root_checkpointable, 0)]) - object_id_map = {0: root_checkpointable} - seen = set() - while to_visit: - checkpointable, node_id = to_visit.popleft() - object_proto = node_list[node_id] - named_children = {} - for child_reference in object_proto.children: - if child_reference.local_name: - named_children[child_reference.local_name] = child_reference - else: - raise AssertionError( - ("The checkpointed object graph contains a reference without " - "a name (corrupted?). The reference was from the node %s.") - % (object_proto,)) - - for checkpointable_reference in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access - child_node_id = _set_reference( - reference_proto_table=named_children, - key=checkpointable_reference.name, - checkpointable=checkpointable_reference.ref, - parent=checkpointable, - object_id_map=object_id_map) - if child_node_id not in seen: - seen.add(child_node_id) - to_visit.append((checkpointable_reference.ref, child_node_id)) - - return object_id_map - - -_ValuePointer = collections.namedtuple( - "_ValuePointer", - [ - # Information needed to look up the value to restore. - "save_path", - "checkpoint_key", - "dtype", - # The session to use when restoring (None when executing eagerly) - "session", - ]) - -_SlotVariableRestoration = collections.namedtuple( - "_SlotVariableRestoration", - [ - # A weak reference to the Optimizer object - "optimizer_ref", - # The slot name - "slot_name", - # The _ValuePointer to use when restoring - "value_pointer", - ]) - -_VariableRestoration = collections.namedtuple( - "_VariableRestoration", - [ - # The variable's (local) name. - "name", - # _SlotVariableRestoration objects indicating slot variables which - # should be created once this variable has been restored. - "slot_restorations", - # The _ValuePointer to use when restoring - "value_pointer", - ]) - - -def _gather_restorations(object_graph_proto, save_path, object_id_map, - dtype_map, session): - """Iterate over variables to restore, matching with Checkpointable objects.""" - variable_to_slot_restorations = {} - for node_id, node in enumerate(object_graph_proto.nodes): - for slot_variable in node.slot_variables: - original_variable_key = (slot_variable.original_variable_node_id, - slot_variable.original_variable_local_name) - variable_to_slot_restorations.setdefault( - original_variable_key, []).append( - _SlotVariableRestoration( - optimizer_ref=weakref.ref(object_id_map[node_id]), - slot_name=slot_variable.slot_name, - value_pointer=_ValuePointer( - save_path=save_path, - checkpoint_key=slot_variable.checkpoint_key, - dtype=dtype_map[slot_variable.checkpoint_key], - session=session))) - - for node_id, node in enumerate(object_graph_proto.nodes): - for variable in node.variables: - slots_key = (node_id, variable.local_name) - variable_restore = _VariableRestoration( - name=variable.local_name, - slot_restorations=variable_to_slot_restorations.get(slots_key, []), - value_pointer=_ValuePointer( - save_path=save_path, - checkpoint_key=variable.checkpoint_key, - dtype=dtype_map[variable.checkpoint_key], - session=session)) - yield variable_restore, object_id_map[node_id] - - -def save(file_prefix, root_checkpointable, global_step=None, session=None): - """Save a training checkpoint. - - Args: - file_prefix: A prefix to use for the checkpoint filenames - (/path/to/directory/and_a_prefix). Names are generated based on this - prefix and the global step, if provided. - root_checkpointable: A Checkpointable object to save. The checkpoint - includes variables created by this object and any Checkpointable objects - it depends on. - global_step: An integer variable or Tensor, used to number - checkpoints. Typically this value is saved along with other variables in - training checkpoints, which will happen automatically if it was created by - `root_checkpointable` or one of its dependencies (via - `Checkpointable.add_variable`). - session: The session to evaluate variables in. Ignored when executing - eagerly. If not provided when graph building, the default session is used. - - Returns: - The full path to the checkpoint. - - Currently also returns the serialized object graph proto, but that will go - away once it's saved with the checkpoint. - """ - named_variables, serialized_graph = _serialize_object_graph( - root_checkpointable) - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - else: - session = None - with ops.device("/device:CPU:0"): - save_path = saver_lib.Saver(var_list=named_variables).save( - sess=session, - save_path=file_prefix, - write_meta_graph=False, - global_step=global_step) - # TODO(allenl): Save the graph with the checkpoint, then returning it and - # taking it as an argument to restore won't be necessary. - return serialized_graph, save_path - - -# NOTE: Will be restore(file_prefix, root_checkpointable) once the object graph -# is saved with the checkpoint. -def restore(save_path, root_checkpointable, object_graph_proto, session=None): - """Restore a training checkpoint. - - Restores the values of variables created with `Checkpointable.add_variable` in - the dependency graph of `root_checkpointable`. Either assigns values - immediately (if variables to restore have been created already), or defers - restoration until the variables are created. - - When building a graph, restorations are executed in the default session if - `session` is `None`. Variable initializers read checkpointed values. - - Args: - save_path: The path to the checkpoint, as returned by `save` or - `tf.train.latest_checkpoint`. If None (as when there is no latest - checkpoint for `tf.train.latest_checkpoint` to return), does nothing. - root_checkpointable: The root of the object graph to restore. Variables to - restore need not have been created yet, but all dependencies on other - Checkpointable objects should already be declared. Objects in the - dependency graph are matched to objects in the checkpointed graph, and - matching objects have their variables restored (or the checkpointed values - saved for eventual restoration when the variable is created). - object_graph_proto: (Temporary) the checkpointed object graph. This will - eventually be saved with the checkpoint, and will not be part of the final - API. - session: The session to evaluate assignment ops in. Ignored when executing - eagerly. If not provided when graph building, the default session is used. - """ - if save_path is None: - return - object_id_map = _checkpoint_object_id_map(root_checkpointable, - object_graph_proto) - reader = training.NewCheckpointReader(save_path) - dtype_map = reader.get_variable_to_dtype_map() - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - else: - session = None - for restoration, checkpointable in _gather_restorations( - object_graph_proto, save_path, object_id_map, dtype_map, session=session): - checkpointable._process_restoration(restoration) # pylint: disable=protected-access - diff --git a/tensorflow/contrib/eager/python/checkpointable_test.py b/tensorflow/contrib/eager/python/checkpointable_test.py deleted file mode 100644 index f7bc155dec..0000000000 --- a/tensorflow/contrib/eager/python/checkpointable_test.py +++ /dev/null @@ -1,497 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import six - -from tensorflow.contrib.eager.python import checkpointable -from tensorflow.contrib.eager.python import network as network_lib -from tensorflow.python.eager import context -from tensorflow.python.eager import test -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.layers import base -from tensorflow.python.layers import core -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.training import adam -from tensorflow.python.training import saver as core_saver -from tensorflow.python.training import training_util - - -class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - core.Dense.__init__(self, *args, **kwargs) - - def add_variable(self, name, shape, **kwargs): - # Calls both Checkpointable.add_variable and Layer.add_variable. Eventually - # Layer.add_variable should inherit from Checkpointable and simply call - # super and then do post-processing. - return checkpointable.Checkpointable.add_variable( - self, - name=name, - shape=shape, - getter=functools.partial(core.Dense.add_variable, self), - **kwargs) - - -# pylint: disable=not-callable -class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): - - def __init__(self): - network_lib.Network.__init__(self) - checkpointable.Checkpointable.__init__(self) - - def __setattr__(self, name, value): - if isinstance(value, base.Layer) and value not in self._already_tracked: - self.track_layer(value, name=name) - # Checkpointable is next in the method resolution order, so this will catch - # Checkpointable objects which aren't Layers. - super(CheckpointableNetwork, self).__setattr__(name, value) - - def track_layer(self, layer, name): - self.track_checkpointable(layer, name=name) - return super(CheckpointableNetwork, self).track_layer(layer) - - -class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - adam.AdamOptimizer.__init__(self, *args, **kwargs) - - # NOTE: Copied from Optimizer with modifications to use add_variable - # for non-slot variables. These contortions are necessary to maintain - # checkpoint compatibility with variable.name based saving. - # TODO(allenl): Make this cleaner. - def _create_non_slot_variable(self, initial_value, name, colocate_with): - """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): - graph = colocate_with.graph - else: - graph = None - - key = (name, graph) - v = self._non_slot_dict.get(key, None) - if v is None: - with ops.colocate_with(colocate_with): - def _variable_getter(name, shape, dtype, initializer): - del shape, dtype # not used, but there for compatibility - return variable_scope.variable( - name=name, initial_value=initializer, trainable=False) - - initial_value = ops.convert_to_tensor(initial_value) - v = self.add_variable( - name=name, - shape=initial_value.get_shape(), - initializer=initial_value, - getter=_variable_getter) - - self._non_slot_dict[key] = v - - return v - - -class NonLayerCheckpointable(checkpointable.Checkpointable): - - def __init__(self): - super(NonLayerCheckpointable, self).__init__() - self.a_variable = self.add_variable(name="a_variable", shape=[]) - - -class MyNetwork(CheckpointableNetwork): - """A concrete Network for testing.""" - - def __init__(self): - super(MyNetwork, self).__init__() - self._named_dense = CheckpointableDenseLayer(1, use_bias=True) - self._via_track_layer = self.track_layer( - CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") - # We can still track Checkpointables which aren't Layers. - self._non_layer = NonLayerCheckpointable() - - def call(self, values): - return self._via_track_layer(self._named_dense(values)) - - -class Root(checkpointable.Checkpointable): - """A stand-in for a Trainer class.""" - - def __init__(self, optimizer, network): - super(Root, self).__init__() - self._optimizer = optimizer - self._network = self.track_checkpointable(network, "network") - self._global_step = None - - @property - def global_step(self): - if self._global_step is None: - # Get the default create_global_step utility to actually call - # self.add_variable, by setting a custom creator. - def _owned_variable_as_creator( - next_creator, initial_value, **kwargs): - def _creator_as_getter(initializer, **kwargs): - return next_creator(initial_value=initializer, **kwargs) - return self.add_variable( - getter=_creator_as_getter, initializer=initial_value, shape=[], - **kwargs) - - with variable_scope.variable_creator_scope( - _owned_variable_as_creator): - self._global_step = training_util.create_global_step() - return self._global_step - - -class InterfaceTests(test.TestCase): - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testAddVariable(self): - obj = NonLayerCheckpointable() - with self.assertRaisesRegexp(ValueError, "do not specify shape"): - obj.add_variable( - name="shape_specified_twice", shape=[], initializer=1) - constant_initializer = obj.add_variable( - name="constant_initializer", initializer=1) - with variable_scope.variable_scope("some_variable_scope"): - ones_initializer = obj.add_variable( - name="ones_initializer", - shape=[2], - initializer=init_ops.ones_initializer(dtype=dtypes.float32)) - bare_initializer = obj.add_variable( - name="bare_initializer", - shape=[2, 2], - dtype=dtypes.float64, - initializer=init_ops.zeros_initializer) - - # Even in graph mode, there are no naming conflicts between objects, only - # naming conflicts within an object. - other_duplicate = resource_variable_ops.ResourceVariable( - name="duplicate", initial_value=1.) - duplicate = obj.add_variable(name="duplicate", shape=[]) - with self.assertRaisesRegexp(ValueError, "'duplicate' already exists"): - obj.add_variable(name="duplicate", shape=[]) - - if context.in_graph_mode(): - self.evaluate(variables.global_variables_initializer()) - self.assertEqual("constant_initializer:0", constant_initializer.name) - self.assertEqual(1, self.evaluate(constant_initializer)) - self.assertEqual("some_variable_scope/ones_initializer:0", - ones_initializer.name) - self.assertAllEqual([1, 1], self.evaluate(ones_initializer)) - self.assertAllEqual([[0., 0.], - [0., 0.]], self.evaluate(bare_initializer)) - self.assertEqual("a_variable:0", obj.a_variable.name) - self.assertEqual("duplicate:0", other_duplicate.name) - if context.in_graph_mode(): - # The .name attribute may be globally influenced, but the checkpoint name - # won't be (tested below). - self.assertEqual("duplicate_1:0", duplicate.name) - else: - # When executing eagerly, there's no uniquification of variable names. The - # checkpoint name will be the same. - self.assertEqual("duplicate:0", duplicate.name) - named_variables, _ = checkpointable._serialize_object_graph(obj) - expected_checkpoint_names = ( - "a_variable", - "bare_initializer", - "constant_initializer", - "duplicate", - "ones_initializer", - ) - six.assertCountEqual( - self, expected_checkpoint_names, named_variables.keys()) - - def testInitNotCalled(self): - - class NoInit(checkpointable.Checkpointable): - - def __init__(self): - pass - - with self.assertRaisesRegexp(RuntimeError, "__init__"): - NoInit().add_variable("var", shape=[]) - - -class CheckpointingTests(test.TestCase): - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNamingWithOptimizer(self): - input_value = constant_op.constant([[3.]]) - network = MyNetwork() - # A nuisance Network using the same optimizer. Its slot variables should not - # go in the checkpoint, since it is never depended on. - other_network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root_checkpointable = Root(optimizer=optimizer, network=network) - if context.in_eager_mode(): - optimizer.minimize( - lambda: network(input_value), - global_step=root_checkpointable.global_step) - optimizer.minimize( - lambda: other_network(input_value), - global_step=root_checkpointable.global_step) - else: - train_op = optimizer.minimize( - network(input_value), global_step=root_checkpointable.global_step) - optimizer.minimize( - other_network(input_value), - global_step=root_checkpointable.global_step) - self.evaluate(variables.global_variables_initializer()) - self.evaluate(train_op) - named_variables, serialized_graph = checkpointable._serialize_object_graph( - root_checkpointable) - expected_checkpoint_names = ( - # Created in the root node, so no prefix. - "global_step", - # No name provided to track_checkpointable(), so the position is used - # instead (one-based). - "network/via_track_layer/kernel", - # track_checkpointable() with a name provided, so that's used - "network/_named_dense/kernel", - "network/_named_dense/bias", - # non-Layer dependency of the network - "network/_non_layer/a_variable", - # The optimizer creates two non-slot variables - "_optimizer/beta1_power", - "_optimizer/beta2_power", - # Slot variables - "network/via_track_layer/kernel/-OPTIMIZER_SLOT/_optimizer/m", - "network/via_track_layer/kernel/-OPTIMIZER_SLOT/_optimizer/v", - "network/_named_dense/kernel/-OPTIMIZER_SLOT/_optimizer/m", - "network/_named_dense/kernel/-OPTIMIZER_SLOT/_optimizer/v", - "network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/m", - "network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/v", - ) - six.assertCountEqual(self, expected_checkpoint_names, - named_variables.keys()) - # Check that we've mapped to the right variable objects (not exhaustive) - self.assertEqual("global_step:0", named_variables["global_step"].name) - self.assertEqual("my_network/checkpointable_dense_layer_1/kernel:0", - named_variables["network/via_track_layer/kernel"].name) - self.assertEqual("my_network/checkpointable_dense_layer/kernel:0", - named_variables["network/_named_dense/kernel"].name) - self.assertEqual("beta1_power:0", - named_variables["_optimizer/beta1_power"].name) - self.assertEqual("beta2_power:0", - named_variables["_optimizer/beta2_power"].name) - # Spot check the generated protocol buffers. - self.assertEqual("_optimizer", - serialized_graph.nodes[0].children[0].local_name) - optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ - 0].node_id] - self.assertEqual("beta1_power", optimizer_node.variables[0].local_name) - self.assertEqual("beta1_power", optimizer_node.variables[0].full_name) - # Variable ordering is arbitrary but deterministic (alphabetized) - self.assertEqual( - "bias", optimizer_node.slot_variables[0].original_variable_local_name) - original_variable_owner = serialized_graph.nodes[ - optimizer_node.slot_variables[0].original_variable_node_id] - self.assertEqual("network/_named_dense/bias", - original_variable_owner.variables[0].checkpoint_key) - self.assertEqual("bias", original_variable_owner.variables[0].local_name) - self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) - self.assertEqual("network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/m", - optimizer_node.slot_variables[0].checkpoint_key) - # We strip off the :0 suffix, as variable.name-based saving does. - self.assertEqual("my_network/checkpointable_dense_layer/bias/Adam", - optimizer_node.slot_variables[0].full_name) - self.assertEqual("my_network/checkpointable_dense_layer/bias/Adam:0", - optimizer.get_slot( - var=named_variables["network/_named_dense/bias"], - name="m").name) - - @test_util.run_in_graph_and_eager_modes() - def testSaveRestore(self): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root_checkpointable = Root(optimizer=optimizer, network=network) - input_value = constant_op.constant([[3.]]) - if context.in_eager_mode(): - optimizer.minimize( - lambda: network(input_value), - global_step=root_checkpointable.global_step) - else: - train_op = optimizer.minimize( - network(input_value), global_step=root_checkpointable.global_step) - self.evaluate(variables.global_variables_initializer()) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") - self.evaluate(state_ops.assign(m_bias_slot, [1.5])) - serialized_graph, save_path = checkpointable.save( - file_prefix=prefix, - root_checkpointable=root_checkpointable, - global_step=root_checkpointable.global_step) - self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) - self.evaluate(state_ops.assign(root_checkpointable.global_step, 3)) - optimizer_variables = self.evaluate(optimizer.variables()) - self.evaluate(state_ops.assign(m_bias_slot, [-2.])) - # Immediate restoration - checkpointable.restore( - save_path=save_path, - root_checkpointable=root_checkpointable, - object_graph_proto=serialized_graph) - self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_checkpointable.global_step)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - with ops.Graph().as_default(): - on_create_network = MyNetwork() - on_create_optimizer = CheckpointableAdam(0.001) - on_create_root = Root( - optimizer=on_create_optimizer, network=on_create_network) - with self.test_session(graph=ops.get_default_graph()): - # Deferred restoration - checkpointable.restore( - save_path=save_path, - root_checkpointable=on_create_root, - object_graph_proto=serialized_graph) - on_create_network(constant_op.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.global_step)) - self.assertAllEqual([42.], - self.evaluate( - on_create_network._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_network._named_dense.variables[1], "m") - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - # beta1_power and beta2_power haven't been created yet, but everything - # else matches. - self.assertAllEqual(optimizer_variables[2:], - self.evaluate(on_create_optimizer.variables())) - on_create_optimizer._create_slots( - [resource_variable_ops.ResourceVariable([1.])]) - beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() - self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) - self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) - - def testDeferredRestorationUsageEager(self): - """An idiomatic eager execution example.""" - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - latest_object_graph = None # Will be saved with the checkpoint eventually. - for training_continuation in range(3): - with ops.Graph().as_default(): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root = Root(optimizer=optimizer, network=network) - checkpointable.restore( - save_path=core_saver.latest_checkpoint(checkpoint_directory), - root_checkpointable=root, - object_graph_proto=latest_object_graph) - for _ in range(num_training_steps): - # TODO(allenl): Use a Dataset and serialize/checkpoint it. - input_value = constant_op.constant([[3.]]) - optimizer.minimize( - lambda: network(input_value), # pylint: disable=cell-var-from-loop - global_step=root.global_step) - latest_object_graph, _ = checkpointable.save( - file_prefix=checkpoint_prefix, - root_checkpointable=root) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.global_step.numpy()) - - def testUsageGraph(self): - """Expected usage when graph building.""" - with context.graph_mode(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - latest_object_graph = None - for training_continuation in range(3): - with ops.Graph().as_default(): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root = Root(optimizer=optimizer, network=network) - input_value = constant_op.constant([[3.]]) - train_op = optimizer.minimize( - network(input_value), - global_step=root.global_step) - init_op = variables.global_variables_initializer() - checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) - with self.test_session(graph=ops.get_default_graph()) as session: - if checkpoint_path is None: - self.assertEqual(0, training_continuation) - session.run(init_op) - # Another alternative would be to run initializers automatically - # if no checkpoint is being loaded. This would make deferred - # loading a bit more useful with graph execution. - else: - checkpointable.restore( - save_path=checkpoint_path, - root_checkpointable=root, - object_graph_proto=latest_object_graph, - session=session) - for _ in range(num_training_steps): - session.run(train_op) - latest_object_graph, _ = checkpointable.save( - file_prefix=checkpoint_prefix, - root_checkpointable=root, - session=session) - self.assertEqual((training_continuation + 1) * num_training_steps, - session.run(root.global_step)) - - def _get_checkpoint_name(self, name): - root = checkpointable.Checkpointable() - root.add_variable(name=name, shape=[1, 2], dtype=dtypes.float64) - named_variables, _ = checkpointable._serialize_object_graph(root) - checkpoint_name, = named_variables.keys() - with ops.name_scope("root/" + checkpoint_name): - pass # Make sure we can use this as an op name if we prefix it. - return checkpoint_name - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testVariableNameEscaping(self): - self.assertEqual(r"a_S__b_S__c", self._get_checkpoint_name(r"a/b/c")) - self.assertEqual(r"b", self._get_checkpoint_name(r"b")) - self.assertEqual(r"c_S__", self._get_checkpoint_name(r"c/")) - self.assertEqual(r"d_S___S_._", self._get_checkpoint_name(r"d/_S__")) - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNumberedPath(self): - root = checkpointable.Checkpointable() - leaf = checkpointable.Checkpointable() - root.track_checkpointable(leaf, name="leaf") - leaf.add_variable(name="v", shape=[]) - named_variables, _ = checkpointable._serialize_object_graph(root) - variable_name, = named_variables.keys() - self.assertEqual(r"leaf/v", variable_name) - - @test_util.run_in_graph_and_eager_modes() - def testLocalNameValidation(self): - root = checkpointable.Checkpointable() - leaf = checkpointable.Checkpointable() - with self.assertRaisesRegexp(ValueError, "invalid name"): - # Leading dashes are reserved, which avoids conflicts with un-named edges - # in paths and the optimizer slots identifier. - root.track_checkpointable(leaf, name="-unnamed-12") - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py new file mode 100644 index 0000000000..d3c57bc606 --- /dev/null +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -0,0 +1,413 @@ +"""Utilities for working with Checkpointable objects.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import checkpointable as core_checkpointable +from tensorflow.python.training import optimizer as optimizer_lib +from tensorflow.python.training import saver as saver_lib + + +_ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. + +# Keyword for identifying that the next bit of a checkpoint variable name is a +# slot name. Checkpoint names for slot variables look like: +# +# /<_OPTIMIZER_SLOTS_NAME>// +# +# Where is a full path from the checkpoint root to the +# variable being slotted for. +_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT" +# Keyword for separating the path to an object from the name of an +# attribute in checkpoint names. Used like: +# /<_OBJECT_ATTRIBUTES_NAME>/ +_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES" +# Key where the object graph proto is saved in a TensorBundle +_OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH" + + +# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange +# or consolidating the implementation with get_variable. +def _default_getter(name, shape, dtype, initializer=None, + partition_info=None, **kwargs): + """A pared-down version of get_variable which does not reuse variables.""" + dtype = dtypes.as_dtype(dtype) + shape_object = tensor_shape.as_shape(shape) + with ops.init_scope(): + if initializer is None: + initializer, initializing_from_value = ( + variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access + name=name, shape=shape_object, dtype=dtype)) + else: + initializing_from_value = not callable(initializer) + # Same logic as get_variable + variable_dtype = dtype.base_dtype + if initializing_from_value: + if shape is not None: + raise ValueError("If initializer is a constant, do not specify shape.") + initial_value = initializer + else: + # Instantiate initializer if provided initializer is a type object. + if isinstance(initializer, type(init_ops.Initializer)): + initializer = initializer(dtype=dtype) + def initial_value(): + return initializer( + shape_object.as_list(), dtype=dtype, partition_info=partition_info) + return resource_variable_ops.ResourceVariable( + initial_value=initial_value, + name=name, + dtype=variable_dtype, + **kwargs + ) + + +def add_variable(checkpointable, name, shape=None, dtype=dtypes.float32, + initializer=None): + """Add a variable to a Checkpointable with no scope influence.""" + return checkpointable._add_variable_with_custom_getter( # pylint: disable=protected-access + name=name, shape=shape, dtype=dtype, + initializer=initializer, getter=_default_getter) + + +def _breadth_first_checkpointable_traversal(root_checkpointable): + """Find shortest paths to all variables owned by dependencies of root.""" + bfs_sorted = [] + to_visit = collections.deque([root_checkpointable]) + path_to_root = {root_checkpointable: ()} + while to_visit: + current_checkpointable = to_visit.popleft() + current_checkpointable._maybe_initialize_checkpointable() # pylint: disable=protected-access + bfs_sorted.append(current_checkpointable) + for child_checkpointable in ( + current_checkpointable._checkpoint_dependencies): # pylint: disable=protected-access + if child_checkpointable.ref not in path_to_root: + path_to_root[child_checkpointable.ref] = ( + path_to_root[current_checkpointable] + (child_checkpointable,)) + to_visit.append(child_checkpointable.ref) + return bfs_sorted, path_to_root + + +def _escape_local_name(name): + # We need to support slashes in local names for compatibility, since this + # naming scheme is being patched in to things like Layer.add_variable where + # slashes were previously accepted. We also want to use slashes to indicate + # edges traversed to reach the variable, so we escape forward slashes in + # names. + return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR) + .replace(r"/", _ESCAPE_CHAR + "S")) + + +def _object_prefix_from_path(path_to_root): + return "/".join( + (_escape_local_name(checkpointable.name) + for checkpointable in path_to_root)) + + +def _slot_variable_naming_for_optimizer(optimizer_path): + """Make a function for naming slot variables in an optimizer.""" + # Name slot variables: + # + # /<_OPTIMIZER_SLOTS_NAME>// + # + # where is exactly the checkpoint name used for the original + # variable, including the path from the checkpoint root and the local name in + # the object which owns it. Note that we only save slot variables if the + # variable it's slotting for is also being saved. + + optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path) + + def _name_slot_variable(variable_path, slot_name): + """With an optimizer specified, name a slot variable.""" + return (variable_path + + optimizer_identifier + + _escape_local_name(slot_name)) + + return _name_slot_variable + + +def _serialize_slot_variables(checkpointable_objects, node_ids, object_names): + """Gather and name slot variables.""" + non_slot_objects = list(checkpointable_objects) + slot_variables = {} + for checkpointable in non_slot_objects: + if isinstance(checkpointable, optimizer_lib.Optimizer): + naming_scheme = _slot_variable_naming_for_optimizer( + optimizer_path=object_names[checkpointable]) + slot_names = checkpointable.get_slot_names() + for slot_name in slot_names: + for original_variable_node_id, original_variable in enumerate( + non_slot_objects): + try: + slot_variable = checkpointable.get_slot( + original_variable, slot_name) + except AttributeError: + slot_variable = None + if slot_variable is None: + continue + slot_variable._maybe_initialize_checkpointable() # pylint: disable=protected-access + if slot_variable._checkpoint_dependencies: # pylint: disable=protected-access + # TODO(allenl): Gather dependencies of slot variables. + raise NotImplementedError( + "Currently only variables with no dependencies can be saved as " + "slot variables. File a feature request if this limitation " + "bothers you.") + if slot_variable in node_ids: + raise NotImplementedError( + "A slot variable was re-used as a dependency of a " + "Checkpointable object. This is not currently allowed. File a " + "feature request if this limitation bothers you.") + checkpoint_name = naming_scheme( + variable_path=object_names[original_variable], + slot_name=slot_name) + object_names[slot_variable] = checkpoint_name + slot_variable_node_id = len(checkpointable_objects) + node_ids[slot_variable] = slot_variable_node_id + checkpointable_objects.append(slot_variable) + slot_variable_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph + .Object.SlotVariableReference( + slot_name=slot_name, + original_variable_node_id=original_variable_node_id, + slot_variable_node_id=slot_variable_node_id)) + slot_variables.setdefault(checkpointable, []).append( + slot_variable_proto) + return slot_variables + + +def _serialize_checkpointables( + checkpointable_objects, node_ids, object_names, slot_variables): + """Name non-slot `Checkpointable`s and add them to `object_graph_proto`.""" + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + named_saveables = {} + + for checkpoint_id, checkpointable in enumerate(checkpointable_objects): + assert node_ids[checkpointable] == checkpoint_id + object_proto = object_graph_proto.nodes.add() + object_proto.slot_variables.extend(slot_variables.get(checkpointable, ())) + object_name = object_names[checkpointable] + for name, saveable in ( + checkpointable._gather_tensors_for_checkpoint().items()): # pylint: disable=protected-access + attribute = object_proto.attributes.add() + attribute.name = name + attribute.checkpoint_key = "%s/%s/%s" % ( + object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name)) + # Figure out the name-based Saver's name for this variable. + saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( + [saveable], convert_variable_to_tensor=False) + attribute.full_name, = saver_dict.keys() + named_saveables[attribute.checkpoint_key] = saveable + + for child in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access + child_proto = object_proto.children.add() + child_proto.node_id = node_ids[child.ref] + child_proto.local_name = child.name + + return named_saveables, object_graph_proto + + +def _serialize_object_graph(root_checkpointable): + """Determine checkpoint keys for variables and build a serialized graph. + + Non-slot variables are keyed based on a shortest path from the root saveable + to the object which owns the variable (i.e. the one which called + `Checkpointable._add_variable` to create it). + + Slot variables are keyed based on a shortest path to the variable being + slotted for, a shortest path to their optimizer, and the slot name. + + Args: + root_checkpointable: A `Checkpointable` object whose variables (including + the variables of dependencies, recursively) should be saved. + + Returns: + A tuple of (named_variables, object_graph_proto): + named_variables: A dictionary mapping names to variable objects. + object_graph_proto: A CheckpointableObjectGraph protocol buffer containing + the serialized object graph and variable references. + + Raises: + ValueError: If there are invalid characters in an optimizer's slot names. + """ + checkpointable_objects, path_to_root = ( + _breadth_first_checkpointable_traversal(root_checkpointable)) + object_names = { + obj: _object_prefix_from_path(path) + for obj, path in path_to_root.items()} + node_ids = {node: node_id for node_id, node + in enumerate(checkpointable_objects)} + slot_variables = _serialize_slot_variables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names) + return _serialize_checkpointables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names, + slot_variables=slot_variables) + + +class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): + + def __init__(self, tensor, name): + spec = saver_lib.BaseSaverBuilder.SaveSpec(tensor, "", name) + super(_NoRestoreSaveable, self).__init__(tensor, [spec], name) + + def restore(self, restored_tensors, restored_shapes): + return control_flow_ops.no_op() + + +def save(file_prefix, root_checkpointable, checkpoint_number=None, + session=None): + """Save a training checkpoint. + + Args: + file_prefix: A prefix to use for the checkpoint filenames + (/path/to/directory/and_a_prefix). Names are generated based on this + prefix and the global step, if provided. + root_checkpointable: A Checkpointable object to save. The checkpoint + includes variables created by this object and any Checkpointable objects + it depends on. + checkpoint_number: An integer variable or Tensor, used to number + checkpoints. Typically this value is saved along with other variables in + training checkpoints, which will happen automatically if it was created by + `root_checkpointable` or one of its dependencies (via + `Checkpointable._add_variable`). + session: The session to evaluate variables in. Ignored when executing + eagerly. If not provided when graph building, the default session is used. + + Returns: + The full path to the checkpoint. + """ + named_variables, serialized_graph = _serialize_object_graph( + root_checkpointable) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + else: + session = None + assert _OBJECT_GRAPH_PROTO_KEY not in named_variables + # TODO(allenl): Feed rather than embedding a constant. + named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( + tensor=constant_op.constant( + serialized_graph.SerializeToString(), dtype=dtypes.string), + name=_OBJECT_GRAPH_PROTO_KEY) + with ops.device("/device:CPU:0"): + save_path = saver_lib.Saver(var_list=named_variables).save( + sess=session, + save_path=file_prefix, + write_meta_graph=False, + global_step=checkpoint_number) + return save_path + + +class CheckpointLoadStatus(object): + + def __init__(self, checkpoint): + self._checkpoint = checkpoint + + def assert_consumed(self): + """Asserts that all objects in the checkpoint have been created/matched.""" + for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): + checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) + if checkpointable is None: + raise AssertionError("Unresolved object in checkpoint: %s" % (node)) + if checkpointable._update_uid < self._checkpoint.restore_uid: # pylint: disable=protected-access + raise AssertionError( + "Object not assigned a value from checkpoint: %s" % (node)) + return self + + +def restore(save_path, root_checkpointable, session=None): + """Restore a training checkpoint. + + Restores the values of variables created with `Checkpointable._add_variable` + in `root_checkpointable` and any objects that it tracks (transitive). Either + assigns values immediately if variables to restore have been created already, + or defers restoration until the variables are created. Dependencies added to + `root_checkpointable` after this call will be matched if they have a + corresponding object in the checkpoint. + + When building a graph, restorations are executed in the default session if + `session` is `None`. Variable initializers read checkpointed values. + + To disallow deferred loading, assert immediately that all checkpointed + variables have been matched to variable objects: + + ```python + restore(path, root).assert_consumed() + ``` + + An exception will be raised unless every object was matched and its variables + already exist. + + Args: + save_path: The path to the checkpoint, as returned by `save` or + `tf.train.latest_checkpoint`. If None (as when there is no latest + checkpoint for `tf.train.latest_checkpoint` to return), does nothing. + root_checkpointable: The root of the object graph to restore. Variables to + restore need not have been created yet, but all dependencies on other + Checkpointable objects should already be declared. Objects in the + dependency graph are matched to objects in the checkpointed graph, and + matching objects have their variables restored (or the checkpointed values + saved for eventual restoration when the variable is created). + session: The session to evaluate assignment ops in. Ignored when executing + eagerly. If not provided when graph building, the default session is used. + Returns: + A CheckpointLoadStatus object, which can be used to make assertions about + the status of checkpoint restoration. + """ + if save_path is None: + return + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + else: + session = None + object_graph_string, = io_ops.restore_v2( + prefix=save_path, + tensor_names=[_OBJECT_GRAPH_PROTO_KEY], + shape_and_slices=[""], + dtypes=[dtypes.string], + name="object_graph_proto_read") + if session is not None: + object_graph_string = session.run(object_graph_string) + else: + object_graph_string = object_graph_string.numpy() + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph_proto.ParseFromString(object_graph_string) + checkpoint = core_checkpointable._Checkpoint( # pylint: disable=protected-access + object_graph_proto=object_graph_proto, + save_path=save_path, + session=session) + core_checkpointable._CheckpointPosition( # pylint: disable=protected-access + checkpoint=checkpoint, proto_id=0).restore(root_checkpointable) + return CheckpointLoadStatus(checkpoint) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py new file mode 100644 index 0000000000..1394f0cf0f --- /dev/null +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -0,0 +1,857 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os +import unittest + +import six + +from tensorflow.contrib.eager.python import checkpointable_utils +from tensorflow.contrib.eager.python import network as network_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.layers import base +from tensorflow.python.layers import core +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable +from tensorflow.python.training import saver as core_saver +from tensorflow.python.training import training_util + + +class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): + + def __init__(self, *args, **kwargs): + checkpointable.Checkpointable.__init__(self) + core.Dense.__init__(self, *args, **kwargs) + + def add_variable(self, name, shape, **kwargs): + # Calls both Checkpointable._add_variable and Layer.add_variable. Eventually + # Layer.add_variable should inherit from Checkpointable and simply call + # super and then do post-processing. + return checkpointable.Checkpointable._add_variable_with_custom_getter( + self, + name=name, + shape=shape, + getter=functools.partial(core.Dense.add_variable, self), + **kwargs) + + +# pylint: disable=not-callable +class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): + + def __setattr__(self, name, value): + if isinstance(value, base.Layer): + self.track_layer(value, name=name) + # Checkpointable is next in the method resolution order, so this will catch + # Checkpointable objects which aren't Layers. + super(CheckpointableNetwork, self).__setattr__(name, value) + + def track_layer(self, layer, name): + self._track_checkpointable(layer, name=name) + return super(CheckpointableNetwork, self).track_layer(layer) + + +class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): + + # NOTE: Copied from Optimizer with modifications to use add_variable + # for non-slot variables. These contortions are necessary to maintain + # checkpoint compatibility with variable.name based saving. + # TODO(allenl): Make this cleaner. + def _create_non_slot_variable(self, initial_value, name, colocate_with): + """Add an extra variable, not associated with a slot.""" + if context.in_graph_mode(): + graph = colocate_with.graph + else: + graph = None + + key = (name, graph) + v = self._non_slot_dict.get(key, None) + if v is None: + with ops.colocate_with(colocate_with): + def _variable_getter(name, shape, dtype, initializer): + del shape, dtype # not used, but there for compatibility + return variable_scope.variable( + name=name, initial_value=initializer, trainable=False) + + initial_value = ops.convert_to_tensor(initial_value) + v = self._add_variable_with_custom_getter( + name=name, + shape=initial_value.get_shape(), + initializer=initial_value, + getter=_variable_getter) + + self._non_slot_dict[key] = v + + return v + + +class NonLayerCheckpointable(checkpointable.Checkpointable): + + def __init__(self): + super(NonLayerCheckpointable, self).__init__() + self.a_variable = checkpointable_utils.add_variable( + self, name="a_variable", shape=[]) + + +class MyNetwork(CheckpointableNetwork): + """A concrete Network for testing.""" + + def __init__(self): + super(MyNetwork, self).__init__() + self._named_dense = CheckpointableDenseLayer(1, use_bias=True) + self._via_track_layer = self.track_layer( + CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") + # We can still track Checkpointables which aren't Layers. + self._non_layer = NonLayerCheckpointable() + + def call(self, values): + return self._via_track_layer(self._named_dense(values)) + + +class Checkpoint(checkpointable.Checkpointable): + """A utility class which groups `Checkpointable` objects.""" + + def __init__(self, **kwargs): + super(Checkpoint, self).__init__() + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + setattr(self, k, v) + self._save_counter = None + + @property + def save_counter(self): + """An integer variable which starts at zero and is incremented on save. + + Used to number checkpoints. + + Returns: + The save counter variable. + """ + if self._save_counter is None: + # Initialized to 0 and incremented before saving. + self._save_counter = checkpointable_utils.add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) + return self._save_counter + + def save(self, file_prefix, session=None): + assign_op = self.save_counter.assign_add(1) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + session.run(assign_op) + return checkpointable_utils.save( + file_prefix=file_prefix, + root_checkpointable=self, + checkpoint_number=self.save_counter, + session=session) + + def restore(self, save_path): + return checkpointable_utils.restore( + save_path=save_path, + root_checkpointable=self) + + +class InterfaceTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testAddVariable(self): + obj = NonLayerCheckpointable() + with self.assertRaisesRegexp(ValueError, "do not specify shape"): + checkpointable_utils.add_variable( + obj, name="shape_specified_twice", shape=[], initializer=1) + constant_initializer = checkpointable_utils.add_variable( + obj, name="constant_initializer", initializer=1) + with variable_scope.variable_scope("some_variable_scope"): + ones_initializer = checkpointable_utils.add_variable( + obj, + name="ones_initializer", + shape=[2], + initializer=init_ops.ones_initializer(dtype=dtypes.float32)) + bare_initializer = checkpointable_utils.add_variable( + obj, + name="bare_initializer", + shape=[2, 2], + dtype=dtypes.float64, + initializer=init_ops.zeros_initializer) + + # Even in graph mode, there are no naming conflicts between objects, only + # naming conflicts within an object. + other_duplicate = resource_variable_ops.ResourceVariable( + name="duplicate", initial_value=1.) + duplicate = checkpointable_utils.add_variable( + obj, name="duplicate", shape=[]) + with self.assertRaisesRegexp(ValueError, "'duplicate' already exists"): + checkpointable_utils.add_variable(obj, name="duplicate", shape=[]) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + self.assertEqual("constant_initializer:0", constant_initializer.name) + self.assertEqual(1, self.evaluate(constant_initializer)) + self.assertEqual("some_variable_scope/ones_initializer:0", + ones_initializer.name) + self.assertAllEqual([1, 1], self.evaluate(ones_initializer)) + self.assertAllEqual([[0., 0.], + [0., 0.]], self.evaluate(bare_initializer)) + self.assertEqual("a_variable:0", obj.a_variable.name) + self.assertEqual("duplicate:0", other_duplicate.name) + if context.in_graph_mode(): + # The .name attribute may be globally influenced, but the checkpoint name + # won't be (tested below). + self.assertEqual("duplicate_1:0", duplicate.name) + else: + # When executing eagerly, there's no uniquification of variable names. The + # checkpoint name will be the same. + self.assertEqual("duplicate:0", duplicate.name) + named_variables, _ = checkpointable_utils._serialize_object_graph(obj) + expected_checkpoint_names = ( + "a_variable/.ATTRIBUTES/VARIABLE_VALUE", + "bare_initializer/.ATTRIBUTES/VARIABLE_VALUE", + "constant_initializer/.ATTRIBUTES/VARIABLE_VALUE", + "duplicate/.ATTRIBUTES/VARIABLE_VALUE", + "ones_initializer/.ATTRIBUTES/VARIABLE_VALUE", + ) + six.assertCountEqual( + self, expected_checkpoint_names, named_variables.keys()) + + def testInitNotCalled(self): + + class NoInit(checkpointable.Checkpointable): + + def __init__(self): + pass + + # __init__ for Checkpointable will be called implicitly. + checkpointable_utils.add_variable(NoInit(), "var", shape=[]) + + def testShapeDtype(self): + root = checkpointable.Checkpointable() + v1 = checkpointable_utils.add_variable( + root, name="v1", initializer=3., dtype=dtypes.float64) + self.assertEqual(dtypes.float64, v1.dtype) + v2 = checkpointable_utils.add_variable( + root, + name="v2", + shape=[3], + initializer=init_ops.ones_initializer, + dtype=dtypes.float64) + self.assertEqual(dtypes.float64, v2.dtype) + self.assertAllEqual([1., 1., 1.], self.evaluate(v2)) + + +class CheckpointingTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNamingWithOptimizer(self): + input_value = constant_op.constant([[3.]]) + network = MyNetwork() + # A nuisance Network using the same optimizer. Its slot variables should not + # go in the checkpoint, since it is never depended on. + other_network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = Checkpoint( + optimizer=optimizer, network=network, optimizer_step=optimizer_step) + if context.in_eager_mode(): + optimizer.minimize( + lambda: network(input_value), + global_step=optimizer_step) + optimizer.minimize( + lambda: other_network(input_value), + global_step=optimizer_step) + else: + train_op = optimizer.minimize( + network(input_value), global_step=optimizer_step) + optimizer.minimize( + other_network(input_value), + global_step=optimizer_step) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + named_variables, serialized_graph = ( + checkpointable_utils._serialize_object_graph(root_checkpointable)) + expected_checkpoint_names = ( + # Created in the root node, so no prefix. + "optimizer_step", + # No name provided to track_checkpointable(), so the position is used + # instead (one-based). + "network/via_track_layer/kernel", + # track_checkpointable() with a name provided, so that's used + "network/_named_dense/kernel", + "network/_named_dense/bias", + # non-Layer dependency of the network + "network/_non_layer/a_variable", + # The optimizer creates two non-slot variables + "optimizer/beta1_power", + "optimizer/beta2_power", + # Slot variables + "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/m", + "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/v", + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + ) + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + expected_checkpoint_names = [ + name + suffix for name in expected_checkpoint_names] + six.assertCountEqual(self, expected_checkpoint_names, + named_variables.keys()) + # Check that we've mapped to the right variable objects (not exhaustive) + self.assertEqual( + "global_step:0", + named_variables["optimizer_step" + suffix].name) + self.assertEqual( + "my_network/checkpointable_dense_layer_1/kernel:0", + named_variables["network/via_track_layer/kernel" + suffix].name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel:0", + named_variables["network/_named_dense/kernel" + suffix].name) + self.assertEqual( + "beta1_power:0", + named_variables["optimizer/beta1_power" + suffix].name) + self.assertEqual( + "beta2_power:0", + named_variables["optimizer/beta2_power" + suffix].name) + # Spot check the generated protocol buffers. + self.assertEqual("optimizer", + serialized_graph.nodes[0].children[1].local_name) + optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ + 1].node_id] + self.assertEqual("beta1_power", + optimizer_node.children[0].local_name) + self.assertEqual("beta1_power", + serialized_graph.nodes[optimizer_node.children[0].node_id] + .attributes[0].full_name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .original_variable_node_id] + .attributes[0].full_name) + # We strip off the :0 suffix, as variable.name-based saving does. + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel/Adam", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .slot_variable_node_id] + .attributes[0].full_name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel/Adam:0", + optimizer.get_slot( + var=named_variables["network/_named_dense/kernel" + suffix], + name="m").name) + self.assertEqual( + "network/_named_dense/kernel" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .original_variable_node_id].attributes[0].checkpoint_key) + self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) + self.assertEqual( + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .slot_variable_node_id].attributes[0].checkpoint_key) + + @test_util.run_in_graph_and_eager_modes() + def testSaveRestore(self): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root_checkpointable = Checkpoint(optimizer=optimizer, network=network) + input_value = constant_op.constant([[3.]]) + if context.in_eager_mode(): + optimizer.minimize( + lambda: network(input_value)) + else: + train_op = optimizer.minimize(network(input_value)) + # TODO(allenl): Make initialization more pleasant when graph building. + root_checkpointable.save_counter # pylint: disable=pointless-statement + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(m_bias_slot, [1.5])) + save_path = root_checkpointable.save(file_prefix=prefix) + self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) + optimizer_variables = self.evaluate(optimizer.variables()) + self.evaluate(state_ops.assign(m_bias_slot, [-2.])) + # Immediate restoration + root_checkpointable.restore(save_path=save_path).assert_consumed() + self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) + self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + with ops.Graph().as_default(): + on_create_network = MyNetwork() + on_create_optimizer = CheckpointableAdam(0.001) + on_create_root = Checkpoint( + optimizer=on_create_optimizer, network=on_create_network) + with self.test_session(graph=ops.get_default_graph()): + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + on_create_network(constant_op.constant([[3.]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual([42.], + self.evaluate( + on_create_network._named_dense.variables[1])) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_network._named_dense.variables[1], "m") + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + self.assertAllEqual(optimizer_variables[2:], + self.evaluate(on_create_optimizer.variables())) + on_create_optimizer._create_slots( + [resource_variable_ops.ResourceVariable([1.])]) + status.assert_consumed() + beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() + self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) + self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) + + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root = Checkpoint( + optimizer=optimizer, network=network, + optimizer_step=training_util.get_or_create_global_step()) + root.restore(core_saver.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + # TODO(allenl): Use a Dataset and serialize/checkpoint it. + input_value = constant_op.constant([[3.]]) + optimizer.minimize( + lambda: network(input_value), # pylint: disable=cell-var-from-loop + global_step=root.optimizer_step) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy()) + + def testUsageGraph(self): + """Expected usage when graph building.""" + with context.graph_mode(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root = Checkpoint( + optimizer=optimizer, network=network, + global_step=training_util.get_or_create_global_step()) + input_value = constant_op.constant([[3.]]) + train_op = optimizer.minimize( + network(input_value), + global_step=root.global_step) + root.save_counter # pylint: disable=pointless-statement + init_op = variables.global_variables_initializer() + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + with self.test_session(graph=ops.get_default_graph()) as session: + if checkpoint_path is None: + self.assertEqual(0, training_continuation) + session.run(init_op) + # Another alternative would be to run initializers automatically + # if no checkpoint is being loaded. This would make deferred + # loading a bit more useful with graph execution. + else: + checkpointable_utils.restore( + save_path=checkpoint_path, + root_checkpointable=root, + session=session) + for _ in range(num_training_steps): + session.run(train_op) + root.save(file_prefix=checkpoint_prefix, + session=session) + self.assertEqual((training_continuation + 1) * num_training_steps, + session.run(root.global_step)) + self.assertEqual(training_continuation + 1, + session.run(root.save_counter)) + + def _get_checkpoint_name(self, name): + root = checkpointable.Checkpointable() + checkpointable_utils.add_variable( + root, name=name, shape=[1, 2], dtype=dtypes.float64) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + checkpoint_name, = named_variables.keys() + with ops.name_scope("root/" + checkpoint_name): + pass # Make sure we can use this as an op name if we prefix it. + return checkpoint_name + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testVariableNameEscaping(self): + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + self.assertEqual(r"a.Sb.Sc" + suffix, self._get_checkpoint_name(r"a/b/c")) + self.assertEqual(r"b" + suffix, self._get_checkpoint_name(r"b")) + self.assertEqual(r"c.S" + suffix, self._get_checkpoint_name(r"c/")) + self.assertEqual(r"d.S..S" + suffix, self._get_checkpoint_name(r"d/.S")) + self.assertEqual(r"d.S..ATTRIBUTES.Sf" + suffix, + self._get_checkpoint_name(r"d/.ATTRIBUTES/f")) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNumberedPath(self): + root = checkpointable.Checkpointable() + leaf = checkpointable.Checkpointable() + root.leaf = leaf + checkpointable_utils.add_variable(leaf, name="v", shape=[]) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + variable_name, = named_variables.keys() + self.assertEqual(r"leaf/v/.ATTRIBUTES/VARIABLE_VALUE", variable_name) + + @test_util.run_in_graph_and_eager_modes() + def testLocalNameValidation(self): + root = checkpointable.Checkpointable() + leaf = checkpointable.Checkpointable() + # Dots are escaped, which avoids conflicts with reserved names. + root._track_checkpointable(leaf, name=".ATTRIBUTES") + checkpointable_utils.add_variable(checkpointable=leaf, name="a", shape=[]) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + name, = named_variables.keys() + self.assertEqual(name, "..ATTRIBUTES/a/.ATTRIBUTES/VARIABLE_VALUE") + + @test_util.run_in_graph_and_eager_modes() + def testLateDependencyTracking(self): + + class Dependency(checkpointable.Checkpointable): + + def build(self): + self.var = checkpointable_utils.add_variable( + self, "var", initializer=0.) + + class LateDependencies(checkpointable.Checkpointable): + + def add_dep(self): + self.dep = Dependency() + self.dep.build() + + original = LateDependencies() + original.add_dep() + self.evaluate(state_ops.assign(original.dep.var, 123.)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpointable_utils.save(checkpoint_prefix, original) + load_into = LateDependencies() + status = checkpointable_utils.restore(save_path, load_into) + with self.assertRaises(AssertionError): + status.assert_consumed() + load_into.add_dep() + status.assert_consumed() + self.assertEqual(123., self.evaluate(load_into.dep.var)) + + @test_util.run_in_graph_and_eager_modes() + def testDepAfterVar(self): + + class Dependency(checkpointable.Checkpointable): + + def build(self): + self.var = checkpointable_utils.add_variable( + self, "var", initializer=0.) + + class DepAfterVar(checkpointable.Checkpointable): + + def add_dep(self): + dep = Dependency() + dep.build() + self.dep = dep + + dep_after_var = DepAfterVar() + dep_after_var.add_dep() + self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpointable_utils.save( + checkpoint_prefix, dep_after_var) + + loaded_dep_after_var = DepAfterVar() + status = checkpointable_utils.restore( + save_path, loaded_dep_after_var) + loaded_dep_after_var.add_dep() + status.assert_consumed() + self.assertEqual(-14., self.evaluate(loaded_dep_after_var.dep.var)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testDeferredSlotRestoration(self): + checkpoint_directory = self.get_temp_dir() + + root = checkpointable.Checkpointable() + root.var = checkpointable_utils.add_variable( + root, name="var", initializer=0.) + optimizer = CheckpointableAdam(0.1) + if context.in_graph_mode(): + train_op = optimizer.minimize(root.var) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + else: + optimizer.minimize(root.var.read_value) + self.evaluate(state_ops.assign(root.var, 12.)) + no_slots_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "no_slots"), root) + root.optimizer = optimizer + self.evaluate(state_ops.assign(root.var, 13.)) + self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), + 14.)) + slots_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "with_slots"), root) + new_root = checkpointable.Checkpointable() + # Load the slot-containing checkpoint (deferred), then immediately overwrite + # the non-slot variable (also deferred). + slot_status = checkpointable_utils.restore( + slots_path, new_root) + no_slot_status = checkpointable_utils.restore( + no_slots_path, new_root) + with self.assertRaises(AssertionError): + no_slot_status.assert_consumed() + new_root.var = checkpointable_utils.add_variable( + new_root, name="var", shape=[]) + self.assertEqual(12., self.evaluate(new_root.var)) + no_slot_status.assert_consumed() + new_root.optimizer = CheckpointableAdam(0.1) + with self.assertRaisesRegexp(AssertionError, "beta1_power"): + slot_status.assert_consumed() + self.assertEqual(12., self.evaluate(new_root.var)) + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(name="m", var=new_root.var))) + if context.in_graph_mode(): + train_op = new_root.optimizer.minimize(new_root.var) + self.evaluate(train_op) + else: + new_root.optimizer.minimize(new_root.var.read_value) + slot_status.assert_consumed() + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testOverlappingRestores(self): + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep = checkpointable.Checkpointable() + save_root.dep.var = checkpointable_utils.add_variable( + save_root.dep, name="var", initializer=0.) + self.evaluate(state_ops.assign(save_root.dep.var, 12.)) + first_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "first"), save_root) + self.evaluate(state_ops.assign(save_root.dep.var, 13.)) + second_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "second"), save_root) + + first_root = checkpointable.Checkpointable() + second_root = checkpointable.Checkpointable() + first_status = checkpointable_utils.restore( + first_path, first_root) + second_status = checkpointable_utils.restore( + second_path, second_root) + load_dep = checkpointable.Checkpointable() + load_dep.var = checkpointable_utils.add_variable( + load_dep, name="var", shape=[]) + first_root.dep = load_dep + first_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + second_root.dep = load_dep + second_status.assert_consumed() + self.assertEqual(13., self.evaluate(load_dep.var)) + + # Try again with the order of the restore() reversed. The last restore + # determines the final value. + first_root = checkpointable.Checkpointable() + second_root = checkpointable.Checkpointable() + second_status = checkpointable_utils.restore( + second_path, second_root) + first_status = checkpointable_utils.restore( + first_path, first_root) + load_dep = checkpointable.Checkpointable() + load_dep.var = checkpointable_utils.add_variable( + load_dep, name="var", shape=[]) + first_root.dep = load_dep + first_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + second_root.dep = load_dep + second_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testAmbiguousLoad(self): + # Not OK to split one checkpoint object into two + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep_one = checkpointable.Checkpointable() + save_root.dep_two = checkpointable.Checkpointable() + dep_three = checkpointable.Checkpointable() + save_root.dep_one.dep_three = dep_three + save_root.dep_two.dep_three = dep_three + checkpointable_utils.add_variable(dep_three, name="var", initializer=0.) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), save_root) + load_root = checkpointable.Checkpointable() + checkpointable_utils.restore(save_path, load_root) + load_root.dep_one = checkpointable.Checkpointable() + load_root.dep_two = checkpointable.Checkpointable() + load_root.dep_one.dep_three = checkpointable.Checkpointable() + with self.assertRaisesRegexp(AssertionError, + "resolved to different objects"): + load_root.dep_two.dep_three = checkpointable.Checkpointable() + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testObjectsCombined(self): + # Currently fine to load two checkpoint objects into one Python object + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep_one = checkpointable.Checkpointable() + save_root.dep_two = checkpointable.Checkpointable() + checkpointable_utils.add_variable( + save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64) + checkpointable_utils.add_variable( + save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), save_root) + load_root = checkpointable.Checkpointable() + load_root.dep_one = checkpointable.Checkpointable() + load_root.dep_two = load_root.dep_one + v1 = checkpointable_utils.add_variable( + load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64) + v2 = checkpointable_utils.add_variable( + load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) + checkpointable_utils.restore(save_path, load_root).assert_consumed() + self.assertEqual(32., self.evaluate(v1)) + self.assertEqual(64., self.evaluate(v2)) + + @test_util.run_in_graph_and_eager_modes() + def testDependencyLoop(self): + # Note: this test creates garbage during eager execution because it + # purposefully creates a reference cycle. + first = checkpointable.Checkpointable() + second = checkpointable.Checkpointable() + first.second = second + second.first = first + first.v = checkpointable_utils.add_variable( + first, "v1", initializer=[3., 1., 4.]) + second.v = checkpointable_utils.add_variable( + second, "v2", initializer=[1., 1., 2., 3.]) + self.evaluate(variables.global_variables_initializer()) + checkpoint_directory = self.get_temp_dir() + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), first) + + # Test deferred loading + first_load = checkpointable.Checkpointable() + status = checkpointable_utils.restore(save_path, first_load) + second_load = checkpointable.Checkpointable() + first_load.second = second_load + second_load.first = first_load + with self.assertRaises(AssertionError): + status.assert_consumed() + first_load.v = checkpointable_utils.add_variable( + first_load, "v1", shape=[3]) + second_load.v = checkpointable_utils.add_variable( + second_load, "v2", shape=[4]) + status.assert_consumed() + self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) + self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v)) + + # Test loading when variables have already been created + self.evaluate(first_load.v.assign([2., 7., 1.])) + self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v)) + self.evaluate(second_load.v.assign([2., 7., 1., 8.])) + self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v)) + checkpointable_utils.restore( + save_path, first_load).assert_consumed() + self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) + self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v)) + + @test_util.run_in_graph_and_eager_modes() + def testRestoreOnAssign(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session(save_graph): + first = checkpointable.Checkpointable() + first.var1 = variable_scope.get_variable( + name="outside_var", initializer=0.) + first.var2 = variable_scope.get_variable( + name="blah", initializer=0.) + self.evaluate(first.var1.assign(4.)) + self.evaluate(first.var2.assign(8.)) + save_path = checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=first) + restore_graph = ops.Graph() + with restore_graph.as_default(), self.test_session(restore_graph): + second = checkpointable.Checkpointable() + second.var2 = variable_scope.get_variable( + name="blah", initializer=0.) + checkpointable_utils.restore(save_path, root_checkpointable=second) + recreated_var1 = variable_scope.get_variable( + name="outside_var", initializer=0.) + self.assertEqual(8., self.evaluate(second.var2)) + self.evaluate(recreated_var1.assign(-2.)) + self.assertEqual(-2., self.evaluate(recreated_var1)) + second.var1 = recreated_var1 + self.assertEqual(4., self.evaluate(recreated_var1)) + + # TODO(allenl): Saver class that doesn't pollute the graph with constants. + @unittest.skip("todo") + def testManySavesGraph(self): + """Saves after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = CheckpointableAdam(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(variables.global_variables_initializer()) + checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + before_ops = graph.get_operations() + checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + self.assertEqual(before_ops, graph.get_operations()) + + @unittest.skip("todo") + def testManyRestoresGraph(self): + """Restores after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = CheckpointableAdam(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + checkpointable_utils.restore( + save_path, root_checkpointable=obj) + before_ops = graph.get_operations() + checkpointable_utils.restore( + save_path, root_checkpointable=obj) + self.assertEqual(before_ops, graph.get_operations()) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f563d32388..cee7c47e00 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2518,6 +2518,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":array_ops", + ":checkpointable", ":control_flow_ops", ":dtypes", ":framework_ops", @@ -2851,6 +2852,30 @@ py_library( ], ) +py_library( + name = "checkpointable", + srcs = ["training/checkpointable.py"], + srcs_version = "PY2AND3", + deps = [ + ":dtypes", + ":io_ops_gen", + ":ops", + ":pywrap_tensorflow", + ":util", + "//tensorflow/python/eager:context", + ], +) + +py_test( + name = "checkpointable_test", + srcs = ["training/checkpointable_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":checkpointable", + ":client_testlib", + ], +) + py_test( name = "evaluation_test", size = "small", diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 25cf5aca83..09d349fc2d 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import variables # pylint: disable=wildcard-import from tensorflow.python.ops.gen_resource_variable_ops import * # pylint: enable=wildcard-import +from tensorflow.python.training import checkpointable from tensorflow.python.util import compat @@ -348,6 +349,11 @@ class ResourceVariable(variables.Variable): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + if isinstance(initial_value, checkpointable.CheckpointInitialValue): + self._maybe_initialize_checkpointable() + self._update_uid = initial_value.checkpoint_position.restore_uid + initial_value = initial_value.wrapped_value + self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 19e3298e40..125922e296 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import compat from tensorflow.python.util import tf_should_use from tensorflow.python.util.deprecation import deprecated @@ -36,7 +37,7 @@ from tensorflow.python.util.tf_export import tf_export @tf_export("Variable") -class Variable(object): +class Variable(checkpointable.Checkpointable): """See the @{$variables$Variables How To} for a high level overview. A variable maintains state in the graph across calls to `run()`. You add a @@ -306,6 +307,11 @@ class Variable(object): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + if isinstance(initial_value, checkpointable.CheckpointInitialValue): + self._maybe_initialize_checkpointable() + self._update_uid = initial_value.checkpoint_position.restore_uid + initial_value = initial_value.wrapped_value + if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.init_scope(): @@ -786,6 +792,20 @@ class Variable(object): setattr(Variable, operator, _run_op) + def _scatter_tensors_from_checkpoint(self, attributes): + """For implementing `Checkpointable`. Return an assignment op to run.""" + if (len(attributes) != 1 + or checkpointable.VARIABLE_VALUE_KEY not in attributes): + raise ValueError( + ("The variable %s was restored with unexpected values (expected one " + "with key %s, got %s)") % ( + self, checkpointable.VARIABLE_VALUE_KEY, attributes)) + return self.assign(attributes[checkpointable.VARIABLE_VALUE_KEY]) + + def _gather_tensors_for_checkpoint(self): + """For implementing `Checkpointable`. This object is saveable on its own.""" + return {checkpointable.VARIABLE_VALUE_KEY: self} + def _try_guard_against_uninitialized_dependencies(self, initial_value): """Attempt to guard against dependencies on uninitialized variables. diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py new file mode 100644 index 0000000000..c2fea0f40d --- /dev/null +++ b/tensorflow/python/training/checkpointable.py @@ -0,0 +1,584 @@ +"""An object-local variable management scheme.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import weakref + +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_io_ops as io_ops +from tensorflow.python.util import nest + +# A key indicating a variable's value in an object's checkpointed Tensors +# (Checkpointable._gather_tensors_for_checkpoint). If this is the only key and +# the object has no dependencies, then its value may be restored on object +# creation (avoiding double assignment when executing eagerly). +VARIABLE_VALUE_KEY = "VARIABLE_VALUE" + +_CheckpointableReference = collections.namedtuple( + "_CheckpointableReference", + [ + # The local name for this dependency. + "name", + # The Checkpointable object being referenced. + "ref" + ]) + + +class CheckpointInitialValue(ops.Tensor): + """Tensor wrapper for managing update UIDs in `Variables`. + + When supplied as an initial value, objects of this type let a `Variable` + (`Variable`, `ResourceVariable`, etc.) know the UID of the restore the initial + value came from. This allows deferred restorations to be sequenced in the + order the user specified them, and lets us fall back on assignment if an + initial value is not set (e.g. due to a custom getter interfering). + + See comments in _add_variable_with_custom_getter for more information about + how `CheckpointInitialValue` is used. + """ + + def __init__(self, checkpoint_position, shape=None): + self.wrapped_value = checkpoint_position.restore_ops()[ + VARIABLE_VALUE_KEY] + if shape: + # We need to set the static shape information on the initializer if + # possible so we don't get a variable with an unknown shape. + self.wrapped_value.set_shape(shape) + self._checkpoint_position = checkpoint_position + + @property + def __class__(self): + return (self.wrapped_value.__class__, CheckpointInitialValue) + + def __getattr__(self, attr): + try: + return getattr(self.wrapped_value, attr) + except AttributeError: + return self.__getattribute__(attr) + + @property + def checkpoint_position(self): + return self._checkpoint_position + + +class _CheckpointPosition(object): + """Indicates a position within a `_Checkpoint`.""" + + def __init__(self, checkpoint, proto_id): + """Specify an object within a checkpoint. + + Args: + checkpoint: A _Checkpoint object. + proto_id: The index of this object in CheckpointableObjectGraph.nodes. + """ + self._checkpoint = checkpoint + self._proto_id = proto_id + + def restore(self, checkpointable): + """Restore this value into `checkpointable`.""" + if self.bind_object(checkpointable): + # This object's correspondence with a checkpointed object is new, so + # process deferred restorations for it and its dependencies. + restore_ops = checkpointable._restore_from_checkpoint_position(self) # pylint: disable=protected-access + session = self._checkpoint.session + if session: + session.run(restore_ops) + + def bind_object(self, checkpointable): + """Set a checkpoint<->object correspondence and process slot variables. + + Args: + checkpointable: The object to record a correspondence for. + Returns: + True if this is a new assignment, False if this object has already been + mapped to a checkpointed `Object` proto. + Raises: + AssertionError: If another object is already bound to the `Object` proto. + """ + checkpoint = self.checkpoint + current_assignment = checkpoint.object_by_proto_id.get(self._proto_id, None) + if current_assignment is None: + checkpoint.object_by_proto_id[self._proto_id] = checkpointable + for deferred_slot_restoration in ( + checkpoint.deferred_slot_restorations.pop(self._proto_id, ())): + checkpointable._process_slot_restoration( # pylint: disable=protected-access + slot_variable_position=_CheckpointPosition( + checkpoint=checkpoint, + proto_id=deferred_slot_restoration.slot_variable_id), + variable=deferred_slot_restoration.original_variable, + slot_name=deferred_slot_restoration.slot_name) + for slot_restoration in checkpoint.slot_restorations.get( + self._proto_id, ()): + optimizer_object = checkpoint.object_by_proto_id.get( + slot_restoration.optimizer_id, None) + if optimizer_object is None: + # The optimizer has not yet been created or tracked. Record in the + # checkpoint that the slot variables need to be restored when it is. + checkpoint.deferred_slot_restorations.setdefault( + slot_restoration.optimizer_id, []).append( + _DeferredSlotVariableRestoration( + original_variable=checkpointable, + slot_variable_id=slot_restoration.slot_variable_id, + slot_name=slot_restoration.slot_name)) + else: + optimizer_object._process_slot_restoration( # pylint: disable=protected-access + slot_variable_position=_CheckpointPosition( + checkpoint=checkpoint, + proto_id=slot_restoration.slot_variable_id), + variable=checkpointable, + slot_name=slot_restoration.slot_name) + return True # New assignment + else: + # The object was already mapped for this checkpoint load, which means + # we don't need to do anything besides check that the mapping is + # consistent (if the dependency DAG is not a tree then there are + # multiple paths to the same object). + if current_assignment is not checkpointable: + raise AssertionError( + ("Unable to load the checkpoint into this object graph. Either " + "the Checkpointable object references in the Python program " + "have changed in an incompatible way, or the checkpoint was " + "generated in an incompatible program.\n\nTwo checkpoint " + "references resolved to different objects (%s and %s).") + % (current_assignment, checkpointable)) + return False # Not a new assignment + + def is_simple_variable(self): + """Determine whether this value is restorable with a Tensor initializer.""" + attributes = self.object_proto.attributes + return (len(attributes) == 1 + and attributes[0].name == VARIABLE_VALUE_KEY + and not self.object_proto.children) + + def restore_ops(self): + """Create restore ops for this object's attributes.""" + restore_tensors = {} + for serialized_tensor in self.object_proto.attributes: + checkpoint_key = serialized_tensor.checkpoint_key + dtype = self._checkpoint.dtype_map[checkpoint_key] + base_type = dtype.base_dtype + with ops.init_scope(): + restore, = io_ops.restore_v2( + prefix=self._checkpoint.save_path, + tensor_names=[checkpoint_key], + shape_and_slices=[""], + dtypes=[base_type], + name="%s_checkpoint_read" % (serialized_tensor.name,)) + restore_tensors[serialized_tensor.name] = restore + return restore_tensors + + @property + def checkpoint(self): + return self._checkpoint + + @property + def checkpointable(self): + return self._checkpoint.object_by_proto_id[self._proto_id] + + @property + def object_proto(self): + return self._checkpoint.object_graph_proto.nodes[self._proto_id] + + @property + def restore_uid(self): + return self._checkpoint.restore_uid + + def __repr__(self): + return repr(self.object_proto) + + +_DeferredSlotVariableRestoration = collections.namedtuple( + "_DeferredSlotVariableRestoration", + [ + "original_variable", + "slot_variable_id", + "slot_name", + ] +) + +_SlotVariableRestoration = collections.namedtuple( + "_SlotVariableRestoration", + [ + # The checkpoint proto id of the optimizer object. + "optimizer_id", + # The checkpoint proto id of the slot variable. + "slot_variable_id", + "slot_name", + ]) + + +class _Checkpoint(object): + """Holds the status of an object-based checkpoint load.""" + + def __init__(self, object_graph_proto, save_path, session): + """Specify the checkpoint being loaded. + + Args: + object_graph_proto: The CheckpointableObjectGraph protocol buffer + associated with this checkpoint. + save_path: The path to the checkpoint, as returned by + `tf.train.latest_checkpoint`. + session: The session to evaluate assignment ops in. Should be None if + executing eagerly. + + Raises: + ValueError: If `session` is not None and eager execution is enabled. + """ + self.object_graph_proto = object_graph_proto + self.restore_uid = ops.uid() + # Dictionary mapping from an id in the protocol buffer flat array to + # Checkpointable Python objects. This mapping may be deferred if a + # checkpoint is restored before all dependencies have been tracked. Uses + # weak references so that partial restorations don't create reference cycles + # (as objects with deferred dependencies will generally have references to + # this object). + self.object_by_proto_id = weakref.WeakValueDictionary() + self.save_path = save_path + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + self.dtype_map = reader.get_variable_to_dtype_map() + # A mapping from optimizer proto ids to lists of slot variables to be + # restored when the optimizer is tracked. Only includes slot variables whose + # regular variables have already been created, and only for optimizer + # objects which have not yet been created/tracked. + self.deferred_slot_restorations = {} + # A mapping from variable proto ids to lists of slot variables to be + # restored when the variable is created/tracked. These get shifted over to + # deferred_slot_restorations if the optimizer hasn't been created when that + # happens. + self.slot_restorations = {} + for node_index, node in enumerate(self.object_graph_proto.nodes): + for slot_reference in node.slot_variables: + # `node` refers to an `Optimizer`, since only these have slot variables. + self.slot_restorations.setdefault( + slot_reference.original_variable_node_id, []).append( + _SlotVariableRestoration( + optimizer_id=node_index, + slot_variable_id=slot_reference.slot_variable_node_id, + slot_name=slot_reference.slot_name)) + if session is not None and context.in_eager_mode(): + raise ValueError( + "Passed a session %s when executing eagerly." % (session,)) + self.session = session + + +class Checkpointable(object): + """Manages dependencies on other objects. + + `Checkpointable` objects may have dependencies: other `Checkpointable` objects + which should be saved if the object declaring the dependency is saved. A + correctly saveable program has a dependency graph such that if changing a + global variable affects an object (e.g. changes the behavior of any of its + methods) then there is a chain of dependencies from the influenced object to + the variable. + + Dependency edges have names, and are created implicitly when a + `Checkpointable` object is assigned to an attribute of another + `Checkpointable` object. For example: + + ``` + obj = Checkpointable() + obj.v = ResourceVariable(0.) + ``` + + The `Checkpointable` object `obj` now has a dependency named "v" on a + variable. + + `Checkpointable` objects may specify `Tensor`s to be saved and restored + directly (e.g. a `Variable` indicating how to save itself) rather than through + dependencies on other objects. See + `Checkpointable._scatter_tensors_from_checkpoint` and + `Checkpointable._gather_tensors_for_checkpoint` for details. + """ + + def _maybe_initialize_checkpointable(self): + """Initialize dependency management. + + Not __init__, since most objects will forget to call it. + """ + if hasattr(self, "_checkpoint_dependencies"): + # __init__ already called. This check means that we don't need + # Checkpointable.__init__() in the constructor of every TensorFlow object. + return + # A list of _CheckpointableReference objects. + self._checkpoint_dependencies = [] + # Maps names -> Checkpointable objects + self._dependency_names = {} + # Restorations for other Checkpointable objects on which this object may + # eventually depend. + self._deferred_dependencies = {} # local name -> _CheckpointPosition list + # The UID of the highest assignment to this object. Used to ensure that the + # last requested assignment determines the final value of an object. + if hasattr(self, "_update_uid"): + raise AssertionError( + "Internal error: the object had an update UID set before its " + "initialization code was run.") + self._update_uid = -1 + + def __setattr__(self, name, value): + """Support self.foo = checkpointable syntax.""" + # Perform the attribute assignment, and potentially call other __setattr__ + # overrides such as that for tf.keras.Model. + super(Checkpointable, self).__setattr__(name, value) + if isinstance(value, Checkpointable): + self._track_checkpointable( + value, name=name, + # Allow the user to switch the Checkpointable which is tracked by this + # name, since assigning a new variable to an attribute has + # historically been fine (e.g. Adam did this). + # TODO(allenl): Should this be a warning once Checkpointable save/load + # is usable? + overwrite=True) + + def _add_variable_with_custom_getter( + self, name, shape=None, dtype=dtypes.float32, + initializer=None, getter=None, **kwargs_for_getter): + """Restore-on-create for a variable be saved with this `Checkpointable`. + + If the user has requested that this object or another `Checkpointable` which + depends on this object be restored from a checkpoint (deferred loading + before variable object creation), `initializer` may be ignored and the value + from the checkpoint used instead. + + Args: + name: A name for the variable. Must be unique within this object. + shape: The shape of the variable. + dtype: The data type of the variable. + + initializer: The initializer to use. Ignored if there is a deferred + restoration left over from a call to + `_restore_from_checkpoint_position`. + + getter: The getter to wrap which actually fetches the variable. + **kwargs_for_getter: Passed to the getter. + + Returns: + The new variable object. + + Raises: + ValueError: If the variable name is not unique. + """ + self._maybe_initialize_checkpointable() + if name in self._dependency_names: + raise ValueError( + ("A variable named '%s' already exists in this Checkpointable, but " + "Checkpointable._add_variable called to create another with " + "that name. Variable names must be unique within a Checkpointable " + "object.") % (name,)) + # If this is a variable with a single Tensor stored in the checkpoint, we + # can set that value as an initializer rather than initializing and then + # assigning (when executing eagerly). + checkpoint_initializer = self._preload_simple_restoration( + name=name, shape=shape) + if (checkpoint_initializer is not None + and not ( + isinstance(initializer, CheckpointInitialValue) + and initializer.restore_uid > checkpoint_initializer.restore_uid)): + # If multiple Checkpointable objects are "creating" the same variable via + # the magic of custom getters, the one with the highest restore UID (the + # one called last) has to make the final initializer. If another custom + # getter interrupts this process by overwriting the initializer, then + # we'll catch that when we call _track_checkpointable. So this is "best + # effort" to set the initializer with the highest restore UID. + initializer = checkpoint_initializer + shape = None + checkpoint_position = checkpoint_initializer.checkpoint_position + else: + checkpoint_position = None + + new_variable = getter( + name=name, shape=shape, dtype=dtype, initializer=initializer, + **kwargs_for_getter) + + if (checkpoint_position is not None + and hasattr(new_variable, "_update_uid") + and new_variable._update_uid == checkpoint_position.restore_uid): # pylint: disable=protected-access + session = checkpoint_position.checkpoint.session + if session: + session.run(new_variable.initializer) + # If we set an initializer and the variable processed it, tracking will not + # assign again. It will add this variable to our dependencies, and if there + # is a non-trivial restoration queued, it will handle that. This also + # handles slot variables. + return self._track_checkpointable(new_variable, name=name) + + def _preload_simple_restoration(self, name, shape): + """Return a dependency's value for restore-on-create. + + Note the restoration is not deleted; if for some reason preload is called + and then not assigned to the variable (for example because a custom getter + overrides the initializer), the assignment will still happen once the + variable is tracked (determined based on checkpoint.restore_uid). + + Args: + name: The object-local name of the dependency holding the variable's + value. + shape: The shape of the variable being loaded into. + Returns: + An callable for use as a variable's initializer/initial_value, or None if + one should not be set (either because there was no variable with this name + in the checkpoint or because it needs more complex deserialization). Any + non-trivial deserialization will happen when the variable object is + tracked. + """ + deferred_dependencies_list = self._deferred_dependencies.get(name, ()) + if not deferred_dependencies_list: + # Nothing to do; we don't have a restore for this dependency queued up. + return + for checkpoint_position in deferred_dependencies_list: + if not checkpoint_position.is_simple_variable(): + # If _any_ pending restoration is too complicated to fit in an + # initializer (because it has dependencies, or because there are + # multiple Tensors to restore), bail and let the general tracking code + # handle it. + return None + checkpoint_position = max( + deferred_dependencies_list, + key=lambda restore: restore.checkpoint.restore_uid) + return CheckpointInitialValue( + checkpoint_position=checkpoint_position, shape=shape) + + def _track_checkpointable(self, checkpointable, name, overwrite=False): + """Declare a dependency on another `Checkpointable` object. + + Indicates that checkpoints for this object should include variables from + `checkpointable`. + + Variables in a checkpoint are mapped to `Checkpointable`s based on names if + provided when the checkpoint was written, but otherwise use the order those + `Checkpointable`s were declared as dependencies. + + To avoid breaking existing checkpoints when modifying a class, neither + variable names nor dependency names (the names passed to + `track_checkpointable`) may change. + + Args: + checkpointable: A `Checkpointable` which this object depends on. + name: A local name for `checkpointable`, used for loading checkpoints into + the correct objects. + overwrite: Boolean, whether silently replacing dependencies is OK. Used + for __setattr__, where throwing an error on attribute reassignment would + be inappropriate. + + Returns: + `checkpointable`, for convenience when declaring a dependency and + assigning to a member variable in one statement. + + Raises: + TypeError: If `checkpointable` does not inherit from `Checkpointable`. + ValueError: If another object is already tracked by this name. + """ + self._maybe_initialize_checkpointable() + if not isinstance(checkpointable, Checkpointable): + raise TypeError( + ("Checkpointable._track_checkpointable() passed type %s, not a " + "Checkpointable.") % (type(checkpointable),)) + new_reference = _CheckpointableReference(name=name, ref=checkpointable) + if (name in self._dependency_names + and self._dependency_names[name] is not checkpointable): + if not overwrite: + raise ValueError( + ("Called Checkpointable._track_checkpointable() with name='%s', " + "but a Checkpointable with this name is already declared as a " + "dependency. Names must be unique (or overwrite=True).") % (name,)) + # This is a weird thing to do, but we're not going to stop people from + # using __setattr__. + for index, (old_name, _) in enumerate(self._checkpoint_dependencies): + if name == old_name: + self._checkpoint_dependencies[index] = new_reference + else: + self._checkpoint_dependencies.append(new_reference) + + self._dependency_names[name] = checkpointable + deferred_dependency_list = self._deferred_dependencies.pop(name, None) + if deferred_dependency_list is not None: + for checkpoint_position in deferred_dependency_list: + checkpoint_position.restore(checkpointable=checkpointable) + return checkpointable + + def _restore_from_checkpoint_position(self, checkpoint_position): + """Restore this object and its dependencies (may be deferred).""" + # Attempt a breadth-first traversal, since presumably the user has more + # control over shorter paths. If we don't have all of the dependencies at + # this point, the end result is not breadth-first (since other deferred + # traversals will happen later). + visit_queue = collections.deque([checkpoint_position]) + restore_ops = [] + while visit_queue: + current_position = visit_queue.popleft() + restore_ops.extend(nest.flatten( + current_position.checkpointable # pylint: disable=protected-access + ._single_restoration_from_checkpoint_position( + checkpoint_position=current_position, + visit_queue=visit_queue))) + return restore_ops + + def _single_restoration_from_checkpoint_position( + self, checkpoint_position, visit_queue): + """Restore this object, and either queue its dependencies or defer them.""" + self._maybe_initialize_checkpointable() + checkpoint = checkpoint_position.checkpoint + # If the UID of this restore is lower than our current update UID, we don't + # need to actually restore the object. However, we should pass the + # restoration on to our dependencies. + if checkpoint.restore_uid > self._update_uid: + restore_op = self._scatter_tensors_from_checkpoint( + checkpoint_position.restore_ops()) + self._update_uid = checkpoint.restore_uid + else: + restore_op = () + for child in checkpoint_position.object_proto.children: + child_position = _CheckpointPosition( + checkpoint=checkpoint, + proto_id=child.node_id) + local_object = self._dependency_names.get(child.local_name, None) + if local_object is None: + # We don't yet have a dependency registered with this name. Save it + # in case we do. + self._deferred_dependencies.setdefault(child.local_name, []).append( + child_position) + else: + if child_position.bind_object(checkpointable=local_object): + # This object's correspondence is new, so dependencies need to be + # visited. Delay doing it so that we get a breadth-first dependency + # resolution order (shallowest paths first). The caller is responsible + # for emptying visit_queue. + visit_queue.append(child_position) + return restore_op + + def _scatter_tensors_from_checkpoint(self, attributes): + """Restores this object from a checkpoint. + + Args: + attributes: A dictionary of Tensors, with key corresponding to those + returned from _gather_tensors_for_checkpoint. + Returns: + A restore op to run (if graph building). + """ + if attributes: + raise AssertionError( + ("A Checkpointable object which was not expecting any data received " + "some from a checkpoint. (Got %s)") % (attributes,)) + return () # No restore ops + + def _gather_tensors_for_checkpoint(self): + """Returns a dictionary of Tensors to save with this object.""" + return {} diff --git a/tensorflow/python/training/checkpointable_test.py b/tensorflow/python/training/checkpointable_test.py new file mode 100644 index 0000000000..e79acb4975 --- /dev/null +++ b/tensorflow/python/training/checkpointable_test.py @@ -0,0 +1,39 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.platform import test +from tensorflow.python.training import checkpointable + + +class InterfaceTests(test.TestCase): + + def testMultipleAssignment(self): + root = checkpointable.Checkpointable() + root.leaf = checkpointable.Checkpointable() + root.leaf = root.leaf + duplicate_name_dep = checkpointable.Checkpointable() + with self.assertRaises(ValueError): + root._track_checkpointable(duplicate_name_dep, name="leaf") + # No error; we're overriding __setattr__, so we can't really stop people + # from doing this while maintaining backward compatibility. + root.leaf = duplicate_name_dep + root._track_checkpointable(duplicate_name_dep, name="leaf", overwrite=True) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index f05c40b32d..762658175a 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -34,6 +34,7 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.training import checkpointable from tensorflow.python.training import slot_creator from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -212,7 +213,7 @@ def _get_processor(v): @tf_export("train.Optimizer") -class Optimizer(object): +class Optimizer(checkpointable.Checkpointable): """Base class for optimizers. This class defines the API to add Ops to train a model. You never use this @@ -924,3 +925,47 @@ class Optimizer(object): if _var_key(var) not in named_slots: named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name) return named_slots[_var_key(var)] + + def _process_slot_restoration( + self, slot_variable_position, slot_name, variable): + """Restore a slot variable's value (creating it if necessary). + + Args: + slot_variable_position: A `checkpointable._CheckpointPosition` object + indicating the slot variable `Checkpointable` object to be restored. + slot_name: The name of this `Optimizer`'s slot to restore into. + variable: The variable object this slot is being created for. + """ + named_slots = self._slot_dict(slot_name) + variable_key = _var_key(variable) + slot_variable = named_slots.get(variable_key, None) + if slot_variable is None: + if slot_variable_position.is_simple_variable(): + initializer = checkpointable.CheckpointInitialValue( + checkpoint_position=slot_variable_position) + slot_variable = self._get_or_make_slot( + var=variable, + val=initializer, + slot_name=slot_name, + op_name=self._name) + if slot_variable._update_uid == slot_variable_position.restore_uid: # pylint: disable=protected-access + # If our restoration was set (not given with custom getters), run + # it. Otherwise wait for the restore() call below to restore if + # necessary. + session = slot_variable_position.checkpoint.session + if session: + session.run(slot_variable.initializer) + + else: + raise NotImplementedError( + "Currently only variables with no dependencies can be loaded as " + "slot variables. File a feature request if this limitation bothers " + "you. (Got %s)" % (slot_variable_position,)) + # Slot variables are not owned by any one object (because we don't want to + # save the slot variable if the optimizer is saved without the non-slot + # variable, or if the non-slot variable is saved without the optimizer; + # it's a dependency hypergraph with edges of the form (optimizer, non-slot + # variable, variable)). So we don't _track_ slot variables anywhere, and + # instead special-case this dependency and otherwise pretend it's a normal + # graph. + slot_variable_position.restore(slot_variable) diff --git a/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt index bc7cf7267f..069200065a 100644 --- a/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.Variable" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "SaveSliceInfo" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt index 863beaea4c..4eea52596a 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdadeltaOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt index 0a7aa9b6bc..5aaaf0e20b 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdagradDAOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt index 83724fea55..7f1201879c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdagradOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt index e285b27a05..503c439d83 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdamOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt index fc28577d6e..39c071748c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.FtrlOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt index bf3c1d81f8..6b441786ca 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.GradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt index a640c8d2c6..80f3963bac 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.MomentumOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt index 6b33c236a3..c880ba328a 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.train.Optimizer" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt index d23fcaed7b..6acdf35f78 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.ProximalAdagradOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt index b6c03e71d9..00b1e309e3 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.ProximalGradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt index 4a82db11cb..05dc391cab 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.RMSPropOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt index e9131bf544..4be2819261 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.SyncReplicasOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index d9d7929959..791016e8b7 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -149,7 +149,7 @@ sh_binary( "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", "//tensorflow/contrib/data/python/ops:contrib_op_loader", "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable", + "//tensorflow/contrib/eager/python:checkpointable_utils", "//tensorflow/contrib/eager/python:evaluator", "//tensorflow/contrib/gan:gan", "//tensorflow/contrib/graph_editor:graph_editor_pip", -- GitLab From 7905d2ae09e20ce628773f319229e21202d4379a Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 15 Feb 2018 14:08:54 -0800 Subject: [PATCH 237/629] Update tf.keras to version 2.1.4. PiperOrigin-RevId: 185897606 --- .../keras/applications/imagenet_utils.py | 3 ++- .../_impl/keras/applications/mobilenet.py | 4 ++-- .../python/keras/_impl/keras/constraints.py | 3 ++- .../keras/_impl/keras/datasets/cifar.py | 21 +++++++++---------- .../python/keras/_impl/keras/datasets/imdb.py | 6 ++---- .../python/keras/_impl/keras/initializers.py | 3 ++- .../keras/layers/convolutional_recurrent.py | 4 ++-- .../python/keras/_impl/keras/layers/local.py | 4 ++-- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py index d9cb726137..c26a28ed40 100644 --- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py +++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py @@ -234,7 +234,8 @@ def decode_predictions(preds, top=5): CLASS_INDEX_PATH, cache_subdir='models', file_hash='c2c37ea517e94d9795004a39431a14cb') - CLASS_INDEX = json.load(open(fpath)) + with open(fpath) as f: + CLASS_INDEX = json.load(f) results = [] for pred in preds: top_indices = pred.argsort()[-top:][::-1] diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py index 027ae26113..1bbbedb85e 100644 --- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py @@ -561,7 +561,7 @@ def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. @@ -627,7 +627,7 @@ def _depthwise_conv_block(inputs, (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the pointwise convolution). + (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. diff --git a/tensorflow/python/keras/_impl/keras/constraints.py b/tensorflow/python/keras/_impl/keras/constraints.py index ab62d575e3..271fbbb63d 100644 --- a/tensorflow/python/keras/_impl/keras/constraints.py +++ b/tensorflow/python/keras/_impl/keras/constraints.py @@ -202,4 +202,5 @@ def get(identifier): elif callable(identifier): return identifier else: - raise ValueError('Could not interpret constraint identifier:', identifier) + raise ValueError('Could not interpret constraint identifier: ' + + str(identifier)) diff --git a/tensorflow/python/keras/_impl/keras/datasets/cifar.py b/tensorflow/python/keras/_impl/keras/datasets/cifar.py index 7ada3340a5..02344897f7 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/cifar.py +++ b/tensorflow/python/keras/_impl/keras/datasets/cifar.py @@ -34,17 +34,16 @@ def load_batch(fpath, label_key='labels'): Returns: A tuple `(data, labels)`. """ - f = open(fpath, 'rb') - if sys.version_info < (3,): - d = cPickle.load(f) - else: - d = cPickle.load(f, encoding='bytes') - # decode utf8 - d_decoded = {} - for k, v in d.items(): - d_decoded[k.decode('utf8')] = v - d = d_decoded - f.close() + with open(fpath, 'rb') as f: + if sys.version_info < (3,): + d = cPickle.load(f) + else: + d = cPickle.load(f, encoding='bytes') + # decode utf8 + d_decoded = {} + for k, v in d.items(): + d_decoded[k.decode('utf8')] = v + d = d_decoded data = d['data'] labels = d[label_key] diff --git a/tensorflow/python/keras/_impl/keras/datasets/imdb.py b/tensorflow/python/keras/_impl/keras/datasets/imdb.py index e2dddf7730..7467bb2464 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/imdb.py +++ b/tensorflow/python/keras/_impl/keras/datasets/imdb.py @@ -144,7 +144,5 @@ def get_word_index(path='imdb_word_index.json'): path, origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json', file_hash='bfafd718b763782e994055a2d397834f') - f = open(path) - data = json.load(f) - f.close() - return data + with open(path) as f: + return json.load(f) diff --git a/tensorflow/python/keras/_impl/keras/initializers.py b/tensorflow/python/keras/_impl/keras/initializers.py index 338c669f97..300bed5e14 100644 --- a/tensorflow/python/keras/_impl/keras/initializers.py +++ b/tensorflow/python/keras/_impl/keras/initializers.py @@ -209,4 +209,5 @@ def get(identifier): elif callable(identifier): return identifier else: - raise ValueError('Could not interpret initializer identifier:', identifier) + raise ValueError('Could not interpret initializer identifier: ' + + str(identifier)) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index a04c3a24bf..d2792b9636 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -39,7 +39,7 @@ class ConvRecurrent2D(Recurrent): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, @@ -200,7 +200,7 @@ class ConvLSTM2D(ConvRecurrent2D): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, diff --git a/tensorflow/python/keras/_impl/keras/layers/local.py b/tensorflow/python/keras/_impl/keras/layers/local.py index 798ac236a3..df0efe6b8b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/local.py +++ b/tensorflow/python/keras/_impl/keras/layers/local.py @@ -53,7 +53,7 @@ class LocallyConnected1D(Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, @@ -222,7 +222,7 @@ class LocallyConnected2D(Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for -- GitLab From a805116366eddcaa8eb6a602398f8efae076e0b5 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 15 Feb 2018 14:24:40 -0800 Subject: [PATCH 238/629] [tf.data] Return OK and set `*end_of_sequence = true` when repeating an empty dataset. Returning an error status could lead to situations (like `empty_ds.repeat(None).interleave(...)`) where the wrong exception was raised. This change ensures that the proper `OutOfRangeError` is raised in the user program. PiperOrigin-RevId: 185900119 --- tensorflow/core/kernels/data/repeat_dataset_op.cc | 11 +++++------ tensorflow/core/kernels/data/shuffle_dataset_op.cc | 11 +++++------ .../kernel_tests/interleave_dataset_op_test.py | 14 ++++++++++++++ .../data/kernel_tests/sequence_dataset_op_test.py | 4 +--- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/data/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc index 1cb533158b..d37086541d 100644 --- a/tensorflow/core/kernels/data/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc @@ -187,12 +187,11 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { } else { input_impl_.reset(); if (first_call) { - // If the first call to GetNext() fails because the end of - // sequence has been reached, we return an OutOfRange error to - // terminate the iteration. (Otherwise, this iterator would loop - // infinitely and never produce a value.) - return errors::OutOfRange( - "Attempted to repeat an empty dataset infinitely."); + // If the first call to GetNext() fails because the end + // of sequence has been reached, we terminate the + // iteration immediately. (Otherwise, this iterator + // would loop infinitely and never produce a value.) + return Status::OK(); } } } while (true); diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 1dde236c17..2f6bf83da5 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -104,13 +104,12 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { break; } if (first_call && dataset()->count_ == -1) { - // If the first call to GetNext() fails because the end of - // sequence has been reached, we return an OutOfRange error to - // terminate the iteration. (Otherwise, this iterator may loop - // infinitely and never produce a value.) + // If the first call to GetNext() fails because the end + // of sequence has been reached, we terminate the + // iteration immediately. (Otherwise, this iterator + // would loop infinitely and never produce a value.) *end_of_sequence = true; - return errors::OutOfRange( - "Attempted to repeat an empty dataset infinitely."); + return Status::OK(); } epoch_++; int64 n = slices_.back()->end; diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py index 28cb50c002..7dbf7268d7 100644 --- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py @@ -201,6 +201,20 @@ class InterleaveDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testEmptyInput(self): + iterator = ( + dataset_ops.Dataset.from_tensor_slices([]) + .repeat(None) + .interleave(dataset_ops.Dataset.from_tensors, cycle_length=2) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py index ae08032e19..1d27b036eb 100644 --- a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py @@ -201,9 +201,7 @@ class SequenceDatasetTest(test.TestCase): with self.test_session() as sess: sess.run(init_op) - with self.assertRaisesRegexp( - errors.OutOfRangeError, - "Attempted to repeat an empty dataset infinitely."): + with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) -- GitLab From 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 14:25:00 -0800 Subject: [PATCH 239/629] Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165 --- .../grappler/optimizers/constant_folding.cc | 36 +++++++++++++++---- .../grappler/optimizers/constant_folding.h | 2 ++ .../optimizers/constant_folding_test.cc | 30 +++++++--------- tensorflow/core/kernels/snapshot_op.h | 17 +++++---- tensorflow/python/grappler/cluster_test.py | 4 +-- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 1e6f11c8aa..8f89f2ae64 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1375,6 +1375,29 @@ void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward, graph_modified_ = true; } +void ConstantFolding::ReplaceOperationWithSnapshot(int input_to_forward, + NodeDef* node, + GraphDef* graph) { + node->set_op("Snapshot"); + DataType dtype = node->attr().at("T").type(); + node->clear_attr(); + (*node->mutable_attr())["T"].set_type(dtype); + + // Propagate the designated input through the Snapshot. + node->mutable_input()->SwapElements(0, input_to_forward); + // Add all other inputs as control dependencies. + for (int i = 1; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + break; + } + const string ctrl_dep = + AddControlDependency(node->input(i), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep); + node->set_input(i, ctrl_dep); + } + graph_modified_ = true; +} + void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, GraphDef* graph) { node->set_op("Reciprocal"); @@ -1443,15 +1466,14 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - const bool safe_to_use_shapes = - use_shape_info && (feed_nodes_.empty() || is_aggressive); + const bool is_mul = IsMul(*node); const bool is_matmul = IsMatMul(*node); const bool is_add = IsAdd(*node) || IsBiasAdd(*node); const bool is_sub = IsSub(*node); const bool is_any_div = IsAnyDiv(*node); // Simplify arithmetic operations with ones or zeros. - if (safe_to_use_shapes && + if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && properties.HasInputProperties(node->name()) && properties.HasOutputProperties(node->name())) { @@ -1475,7 +1497,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, ((is_mul && x_is_one) || (is_add && x_is_zero))) { // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. - ReplaceOperationWithIdentity(1, node, output); + ReplaceOperationWithSnapshot(1, node, output); continue; } @@ -1495,9 +1517,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) || - ((is_add || is_sub) && y_is_zero && is_aggressive))) { + ((is_add || is_sub) && y_is_zero))) { // x * 1 = x or x / 1 = x or x +/- 0 = x - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithSnapshot(0, node, output); continue; } @@ -1690,6 +1712,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + LOG(INFO) << "Graph before: " << item.graph.DebugString(); nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -1716,6 +1739,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); + LOG(INFO) << "Graph after: " << output->DebugString(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 18acc91e8a..e4078514af 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -79,6 +79,8 @@ class ConstantFolding : public GraphOptimizer { bool IsZeros(const NodeDef& node) const; void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, + GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 46998dcc91..d8df19fe6a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -195,8 +195,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"addn", "matmul3", "matmul4"}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -214,11 +213,11 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("^zeros", node.input(0)); EXPECT_EQ("^y", node.input(1)); } else if (name == "mul3") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "mul4") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "mul5") { @@ -230,7 +229,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("^zeros_1d", node.input(0)); EXPECT_EQ("^y", node.input(1)); } else if (name == "div1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "div2") { @@ -266,15 +265,15 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(2, t.tensor_shape().dim(0).size()); EXPECT_EQ(3, t.tensor_shape().dim(1).size()); } else if (name == "add1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "add2") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "bias_add1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros_1d", node.input(1)); } else if (name == "bias_add2") { @@ -283,7 +282,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("zeros", node.input(0)); EXPECT_EQ("bias", node.input(1)); } else if (name == "sub1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { @@ -322,8 +321,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"div_f", "div_i", "realdiv"}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -413,8 +411,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -468,8 +465,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1337,7 +1333,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1398,7 +1394,7 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch.push_back("reshape"); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h index 2c79893b49..b94834f159 100644 --- a/tensorflow/core/kernels/snapshot_op.h +++ b/tensorflow/core/kernels/snapshot_op.h @@ -35,12 +35,17 @@ class SnapshotOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); Tensor* output = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, input.shape(), &output)); - const Device& device = context->eigen_device(); - device.memcpy(output->template flat().data(), - input.template flat().data(), - input.NumElements() * sizeof(Scalar)); + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + // We had to allocate a new buffer since the refcount on the input was + // greater than 1. Copy the input to the new buffer. + const Device& device = context->eigen_device(); + device.memcpy(output->template flat().data(), + input.template flat().data(), + input.NumElements() * sizeof(Scalar)); + } } }; diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py index 10d515a364..caae5b114e 100644 --- a/tensorflow/python/grappler/cluster_test.py +++ b/tensorflow/python/grappler/cluster_test.py @@ -45,7 +45,7 @@ class ClusterTest(test.TestCase): op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts( grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testNoDetailedStats(self): @@ -125,7 +125,7 @@ class ClusterTest(test.TestCase): disable_detailed_stats=False, disable_timeline=False) as gcluster: op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testAvailableOps(self): -- GitLab From 41a8560e0c2fa3b8fa73622a15da53f5e1b8b6c8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 14:49:01 -0800 Subject: [PATCH 240/629] Implement Split PiperOrigin-RevId: 185904437 --- tensorflow/contrib/lite/builtin_op_data.h | 4 + tensorflow/contrib/lite/kernels/BUILD | 13 ++ .../internal/reference/reference_ops.h | 49 +++--- tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/kernels/split.cc | 159 ++++++++++++++++++ tensorflow/contrib/lite/kernels/split_test.cc | 147 ++++++++++++++++ tensorflow/contrib/lite/kernels/test_util.cc | 1 + tensorflow/contrib/lite/model.cc | 8 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 8 +- .../contrib/lite/schema/schema_generated.h | 141 +++++++++++++++- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 28 ++- .../testing/generated_examples_zip_test.cc | 1 + .../contrib/lite/toco/tflite/operator.cc | 23 ++- 15 files changed, 551 insertions(+), 35 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/split.cc create mode 100644 tensorflow/contrib/lite/kernels/split_test.cc diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h index 5dbeadd165..5fc8954743 100644 --- a/tensorflow/contrib/lite/builtin_op_data.h +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -195,6 +195,10 @@ typedef struct { bool keep_dims; } TfLiteMeanParams; +typedef struct { + int num_splits; +} TfLiteSplitParams; + typedef struct { // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. // For now we will fix the maximum possible number of dimensions. diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index d80c8bb671..b59dc5ffb3 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -129,6 +129,7 @@ cc_library( "skip_gram.cc", "space_to_batch_nd.cc", "space_to_depth.cc", + "split.cc", "squeeze.cc", "strided_slice.cc", "sub.cc", @@ -574,6 +575,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "split_test", + size = "small", + srcs = ["split_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "squeeze_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 2e0376656a..5f4d5be323 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1590,6 +1590,33 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, } } +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int axis, int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + const int batches = ArraySize(*output_dims[0], 3); + const int height = ArraySize(*output_dims[0], 2); + const int width = ArraySize(*output_dims[0], 1); + const int depth = ArraySize(*output_dims[0], 0); + + const int slice_size = ArraySize(*output_dims[0], axis); + + for (int i = 0; i < outputs_count; ++i) { + int offset = i * slice_size * input_dims.strides[axis]; + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + auto out = Offset(*output_dims[i], c, x, y, b); + auto in = Offset(input_dims, c, x, y, b); + output_data[i][out] = input_data[offset + in]; + } + } + } + } + } +} + template void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, int outputs_count, Scalar* const* output_data, @@ -1600,28 +1627,12 @@ void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); } - const int batches = MatchingArraySize(*output_dims[0], 3, input_dims, 3); - const int height = MatchingArraySize(*output_dims[0], 2, input_dims, 2); - const int width = MatchingArraySize(*output_dims[0], 1, input_dims, 1); // for now we dont have a model with a TensorFlowSplit // with fused activation function. TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - int in_c = 0; - for (int i = 0; i < outputs_count; ++i) { - const int depth = ArraySize(*output_dims[i], 0); - for (int c = 0; c < depth; ++c) { - output_data[i][Offset(*output_dims[i], c, x, y, b)] = - input_data[Offset(input_dims, in_c, x, y, b)]; - in_c++; - } - } - TFLITE_DCHECK(in_c == ArraySize(input_dims, 0)); - } - } - } + + TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count, + output_data, output_dims); } // TODO(benoitjacob) make this a proper reference impl without Eigen! diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index fa870ddb40..edc4e26edb 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -58,6 +58,7 @@ TfLiteRegistration* Register_SPACE_TO_DEPTH(); TfLiteRegistration* Register_GATHER(); TfLiteRegistration* Register_TRANSPOSE(); TfLiteRegistration* Register_MEAN(); +TfLiteRegistration* Register_SPLIT(); TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_EXP(); @@ -108,6 +109,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_MEAN, Register_MEAN()); AddBuiltin(BuiltinOperator_DIV, Register_DIV()); AddBuiltin(BuiltinOperator_SUB, Register_SUB()); + AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT()); AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE()); AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); AddBuiltin(BuiltinOperator_EXP, Register_EXP()); diff --git a/tensorflow/contrib/lite/kernels/split.cc b/tensorflow/contrib/lite/kernels/split.cc new file mode 100644 index 0000000000..b524c79f87 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/split.cc @@ -0,0 +1,159 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace split { + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + params = reinterpret_cast(node->builtin_data); + axis = GetInput(context, node, 0); + input = GetInput(context, node, 1); + } + TfLiteSplitParams* params; + TfLiteTensor* axis; + TfLiteTensor* input; +}; + +TfLiteStatus UseDynamicOutputTensors(TfLiteContext* context, TfLiteNode* node) { + for (int i = 0; i < NumOutputs(node); ++i) { + SetTensorToDynamic(GetOutput(context, node, i)); + } + return kTfLiteOk; +} + +TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node, + TfLiteTensor* axis, TfLiteTensor* input, + int num_splits) { + int axis_value = GetTensorData(axis)[0]; + if (axis_value < 0) { + axis_value += NumDimensions(input); + } + + const int input_size = SizeOfDimension(input, axis_value); + TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0, + "Not an even split"); + const int slice_size = input_size / num_splits; + + for (int i = 0; i < NumOutputs(node); ++i) { + TfLiteIntArray* output_dims = TfLiteIntArrayCopy(input->dims); + output_dims->data[axis_value] = slice_size; + TfLiteTensor* output = GetOutput(context, node, i); + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_dims)); + } + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + + OpContext op_context(context, node); + + TF_LITE_ENSURE_EQ(context, NumOutputs(node), op_context.params->num_splits); + + auto input_type = op_context.input->type; + TF_LITE_ENSURE(context, + input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8); + for (int i = 0; i < NumOutputs(node); ++i) { + GetOutput(context, node, i)->type = input_type; + } + + // If we know the contents of the 'axis' tensor, resize all outputs. + // Otherwise, wait until Eval(). + if (IsConstantTensor(op_context.axis)) { + return ResizeOutputTensors(context, node, op_context.axis, op_context.input, + op_context.params->num_splits); + } else { + return UseDynamicOutputTensors(context, node); + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + + // When the 'axis' tensor is non-const we can't resize output tensors in + // Prepare(), and we have to do it now. + if (!IsConstantTensor(op_context.axis)) { + TF_LITE_ENSURE_OK( + context, + ResizeOutputTensors(context, node, op_context.axis, op_context.input, + op_context.params->num_splits)); + } + + int axis_value = GetTensorData(op_context.axis)[0]; + if (axis_value < 0) { + axis_value += NumDimensions(op_context.input); + } + axis_value = RemapDim(NumDimensions(op_context.input), axis_value); + + // TODO(ahentz): Our usage of VectorOfTensors could be optimized by + // calculating it in Prepare, unless we defer shape calculation. + // TODO(ahentz): We can improve the optimized_ops version to handle other + // cases too. +#define TF_LITE_SPLIT(scalar) \ + VectorOfTensors all_outputs(*context, *node->outputs); \ + if (axis_value == NumDimensions(op_context.input)) { \ + optimized_ops::TensorFlowSplit( \ + GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), NumOutputs(node), all_outputs.data(), \ + all_outputs.dims()); \ + } else { \ + reference_ops::TensorFlowSplit( \ + GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), axis_value, NumOutputs(node), \ + all_outputs.data(), all_outputs.dims()); \ + } + switch (op_context.input->type) { + case kTfLiteFloat32: { + TF_LITE_SPLIT(float); + break; + } + case kTfLiteUInt8: { + TF_LITE_SPLIT(uint8_t); + break; + } + default: + context->ReportError(context, + "Only float32 and uint8 are currently supported."); + return kTfLiteError; + } +#undef TF_LITE_SPLIT + + return kTfLiteOk; +} + +} // namespace split + +TfLiteRegistration* Register_SPLIT() { + static TfLiteRegistration r = {nullptr, nullptr, split::Prepare, split::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/split_test.cc b/tensorflow/contrib/lite/kernels/split_test.cc new file mode 100644 index 0000000000..61a0759c64 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/split_test.cc @@ -0,0 +1,147 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +constexpr int kAxisIsATensor = -1000; + +class SplitOpModel : public SingleOpModel { + public: + SplitOpModel(const TensorData& input, int num_splits, + int axis = kAxisIsATensor) { + if (axis == kAxisIsATensor) { + axis_ = AddInput({TensorType_INT32, {1}}); + } else { + axis_ = AddConstInput(TensorType_INT32, {axis}, {1}); + } + input_ = AddInput(input); + for (int i = 0; i < num_splits; ++i) { + outputs_.push_back(AddOutput(input.type)); + } + SetBuiltinOp(BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions, + CreateSplitOptions(builder_, num_splits).Union()); + if (axis == kAxisIsATensor) { + BuildInterpreter({GetShape(axis_), GetShape(input_)}); + } else { + BuildInterpreter({{}, GetShape(input_)}); + } + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + void SetAxis(int axis) { PopulateTensor(axis_, {axis}); } + + std::vector GetOutput(int i) { + return ExtractVector(outputs_[i]); + } + std::vector GetOutputShape(int i) { return GetTensorShape(outputs_[i]); } + + private: + int input_; + int axis_; + std::vector outputs_; +}; + +using TensorValues = std::initializer_list; + +void Check(int axis, int num_splits, std::initializer_list input_shape, + std::initializer_list output_shape, + const TensorValues& input_data, + const std::vector& output_data) { + auto debug = [&](int i) { + std::stringstream ss; + ss << "for output tensor " << i << " axis=" << axis + << " and num_splits=" << num_splits; + return ss.str(); + }; + SplitOpModel m({TensorType_FLOAT32, input_shape}, num_splits); + m.SetInput(input_data); + m.SetAxis(axis); + m.Invoke(); + for (int i = 0; i < num_splits; ++i) { + EXPECT_THAT(m.GetOutput(i), ElementsAreArray(output_data[i])) << debug(i); + EXPECT_THAT(m.GetOutputShape(i), ElementsAreArray(output_shape)) + << debug(i); + } + + SplitOpModel const_m({TensorType_FLOAT32, input_shape}, num_splits, axis); + const_m.SetInput(input_data); + const_m.Invoke(); + for (int i = 0; i < num_splits; ++i) { + EXPECT_THAT(const_m.GetOutput(i), ElementsAreArray(output_data[i])) + << debug(i); + EXPECT_THAT(const_m.GetOutputShape(i), ElementsAreArray(output_shape)) + << debug(i); + } +} + +TEST(SplitOpTest, FourDimensional) { + Check(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, + {9, 10, 11, 12, 13, 14, 15, 16}, + }); + Check(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 9, 10, 11, 12}, + {5, 6, 7, 8, 13, 14, 15, 16}, + }); + Check(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 5, 6, 9, 10, 13, 14}, + {3, 4, 7, 8, 11, 12, 15, 16}, + }); + Check(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 3, 5, 7, 9, 11, 13, 15}, + {2, 4, 6, 8, 10, 12, 14, 16}, + }); +} + +TEST(SplitOpTest, OneDimensional) { + Check(/*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); +} + +TEST(SplitOpTest, NegativeAxis) { + Check(/*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, + {9, 10, 11, 12, 13, 14, 15, 16}, + }); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 6f56aa6bf3..373310bd87 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -187,6 +187,7 @@ void SingleOpModel::BuildInterpreter( for (const auto& shape : input_shapes) { int input_idx = interpreter_->inputs()[i++]; if (input_idx == kOptionalTensor) continue; + if (shape.empty()) continue; CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk); } CHECK(interpreter_->AllocateTensors() == kTfLiteOk) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 92922a1460..841e96f137 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -535,6 +535,14 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_SPLIT: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_SplitOptions()) { + params->num_splits = schema_params->num_splits(); + } + builtin_data = reinterpret_cast(params); + break; + } case BuiltinOperator_SQUEEZE: { auto* params = MallocPOD(); if (auto* schema_params = op->builtin_options_as_SqueezeOptions()) { diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index a83349d95f..02e8499f61 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -340,6 +340,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_MEAN: case tflite::BuiltinOperator_DIV: case tflite::BuiltinOperator_SUB: + case tflite::BuiltinOperator_SPLIT: case tflite::BuiltinOperator_SQUEEZE: case tflite::BuiltinOperator_STRIDED_SLICE: case tflite::BuiltinOperator_EXP: diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 7ec19a0612..75970b4126 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -121,7 +121,8 @@ enum BuiltinOperator : byte { STRIDED_SLICE = 45, BIDIRECTIONAL_SEQUENCE_RNN = 46, EXP = 47, - TOPK_V2=48, + TOPK_V2 = 48, + SPLIT = 49, } // Options for the builtin operators. @@ -160,6 +161,7 @@ union BuiltinOptions { StridedSliceOptions, ExpOptions, TopKV2Options, + SplitOptions, } enum Padding : byte { SAME, VALID } @@ -350,6 +352,10 @@ table SqueezeOptions { squeeze_dims:[int]; } +table SplitOptions { + num_splits: int; +} + table StridedSliceOptions { begin_mask: int; end_mask: int; diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 16cda10c51..06989c7b61 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -130,6 +130,9 @@ struct MeanOptionsT; struct SqueezeOptions; struct SqueezeOptionsT; +struct SplitOptions; +struct SplitOptionsT; + struct StridedSliceOptions; struct StridedSliceOptionsT; @@ -236,11 +239,12 @@ enum BuiltinOperator { BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, BuiltinOperator_EXP = 47, BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_TOPK_V2 + BuiltinOperator_MAX = BuiltinOperator_SPLIT }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[46] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[47] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -287,7 +291,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[46] { BuiltinOperator_STRIDED_SLICE, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, BuiltinOperator_EXP, - BuiltinOperator_TOPK_V2 + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT }; return values; } @@ -343,6 +348,7 @@ inline const char **EnumNamesBuiltinOperator() { "BIDIRECTIONAL_SEQUENCE_RNN", "EXP", "TOPK_V2", + "SPLIT", nullptr }; return names; @@ -389,11 +395,12 @@ enum BuiltinOptions { BuiltinOptions_StridedSliceOptions = 32, BuiltinOptions_ExpOptions = 33, BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_TopKV2Options + BuiltinOptions_MAX = BuiltinOptions_SplitOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[35] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[36] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -429,7 +436,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[35] { BuiltinOptions_SequenceRNNOptions, BuiltinOptions_StridedSliceOptions, BuiltinOptions_ExpOptions, - BuiltinOptions_TopKV2Options + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions }; return values; } @@ -471,6 +479,7 @@ inline const char **EnumNamesBuiltinOptions() { "StridedSliceOptions", "ExpOptions", "TopKV2Options", + "SplitOptions", nullptr }; return names; @@ -621,6 +630,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -924,6 +937,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_TopKV2Options ? reinterpret_cast(value) : nullptr; } + SplitOptionsT *AsSplitOptions() { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } + const SplitOptionsT *AsSplitOptions() const { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3391,6 +3412,60 @@ inline flatbuffers::Offset CreateSqueezeOptionsDirect( flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct SplitOptionsT : public flatbuffers::NativeTable { + typedef SplitOptions TableType; + int32_t num_splits; + SplitOptionsT() + : num_splits(0) { + } +}; + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SplitOptionsT NativeTableType; + enum { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } + SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SplitOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSplitOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct StridedSliceOptionsT : public flatbuffers::NativeTable { typedef StridedSliceOptions TableType; int32_t begin_mask; @@ -3712,6 +3787,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == BuiltinOptions_TopKV2Options ? static_cast(builtin_options()) : nullptr; } + const SplitOptions *builtin_options_as_SplitOptions() const { + return builtin_options_type() == BuiltinOptions_SplitOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -3874,6 +3952,10 @@ template<> inline const TopKV2Options *Operator::builtin_options_as inline const SplitOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5269,6 +5351,32 @@ inline flatbuffers::Offset CreateSqueezeOptions(flatbuffers::Fla _squeeze_dims); } +inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SplitOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SplitOptions::UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_splits(); _o->num_splits = _e; }; +} + +inline flatbuffers::Offset SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSplitOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_splits = _o->num_splits; + return tflite::CreateSplitOptions( + _fbb, + _num_splits); +} + inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new StridedSliceOptionsT(); UnPackTo(_o, _resolver); @@ -5623,6 +5731,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -5777,6 +5889,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -5919,6 +6035,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateTopKV2Options(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + return CreateSplitOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -6061,6 +6181,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new TopKV2OptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_SplitOptions: { + value = new SplitOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -6238,6 +6362,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index d9e269f593..06570ae9aa 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -46,6 +46,7 @@ gen_zipped_test_files( "softmax.zip", "space_to_batch_nd.zip", "space_to_depth.zip", + "split.zip", "squeeze.zip", "strided_slice.zip", "sub.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index f0b4fcbd52..1ced3bfd73 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -103,6 +103,8 @@ KNOWN_BUGS = { r"div.*int32": "72051395", # TOCO require matching dimensions in strided_slice. r"strided_slice.*begin=\[0\].*end=\[1\].*": "73170889", + # No support for SplitV + r"split.*num_or_size_splits=\[2,2\]": "73377559", } @@ -1030,8 +1032,31 @@ def make_depthwiseconv_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_split_tests(zip_path): + """Make a set of tests to do tf.split.""" + + test_parameters = [{ + "input_shape": [[1, 3, 4, 6], [2, 4, 1], [6, 4], [8]], + "num_or_size_splits": [1, 2, 3, 4, 5, [2, 2]], + "axis": [0, 1, 2, 3, -4, -3, -2, -1], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = tf.split( + input_tensor, parameters["num_or_size_splits"], parameters["axis"]) + return [input_tensor], out + + def build_inputs(parameters, sess, inputs, outputs): + values = [create_tensor_data(np.float32, parameters["input_shape"])] + return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_concatenation_tests(zip_path): - """Make a set of tests to do concatenatinon.""" + """Make a set of tests to do concatenation.""" test_parameters = [{ "base_shape": [[1, 3, 4, 3], [3, 4]], @@ -1786,6 +1811,7 @@ def main(unused_args): "softmax.zip": make_softmax_tests, "space_to_depth.zip": make_space_to_depth_tests, "topk.zip": make_topk_tests, + "split.zip": make_split_tests, "transpose.zip": make_transpose_tests, "mean.zip": make_mean_tests, "squeeze.zip": make_squeeze_tests, diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 80e806ab03..49766cedac 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -262,6 +262,7 @@ INSTANTIATE_TESTS(sigmoid) INSTANTIATE_TESTS(softmax) INSTANTIATE_TESTS(space_to_depth) INSTANTIATE_TESTS(sub) +INSTANTIATE_TESTS(split) INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(transpose) INSTANTIATE_TESTS(mean) diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 5f2caa5bbb..aabc7c5109 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -601,15 +601,21 @@ class Squeeze } }; -class Split : public CustomOperator { +class Split + : public BuiltinOperator { public: - using CustomOperator::CustomOperator; - void WriteOptions(const TocoOperator& op, - flexbuffers::Builder* fbb) const override { - fbb->Int("num_split", op.num_split); + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateSplitOptions(*builder, op.num_split); } - void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { - op->num_split = m["num_split"].AsInt64(); + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->num_split = options.num_splits(); } }; @@ -813,6 +819,8 @@ std::vector> BuildOperatorList() { OperatorType::kResizeBilinear)); ops.emplace_back( new Squeeze(::tflite::BuiltinOperator_SQUEEZE, OperatorType::kSqueeze)); + ops.emplace_back(new Split(::tflite::BuiltinOperator_SPLIT, + OperatorType::kTensorFlowSplit)); ops.emplace_back(new StridedSlice(::tflite::BuiltinOperator_STRIDED_SLICE, OperatorType::kStridedSlice)); ops.emplace_back( @@ -825,7 +833,6 @@ std::vector> BuildOperatorList() { ops.emplace_back( new DepthToSpace("DEPTH_TO_SPACE", OperatorType::kDepthToSpace)); ops.emplace_back(new FakeQuant("FAKE_QUANT", OperatorType::kFakeQuant)); - ops.emplace_back(new Split("SPLIT", OperatorType::kTensorFlowSplit)); ops.emplace_back(new TensorFlowUnsupported( "TENSORFLOW_UNSUPPORTED", OperatorType::kTensorFlowUnsupported)); -- GitLab From a4dc25936dc4079c2e4e4869aa71033da977c5fb Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 15 Feb 2018 15:09:19 -0800 Subject: [PATCH 241/629] Update tf.keras to Keras 2.1.4 API PiperOrigin-RevId: 185908711 --- .../keras/_impl/keras/layers/recurrent.py | 51 +++++++++--- .../_impl/keras/layers/recurrent_test.py | 31 ++++++- .../keras/_impl/keras/layers/wrappers.py | 82 +++++++++++++++++-- .../keras/_impl/keras/layers/wrappers_test.py | 47 +++++++++-- ...nsorflow.keras.layers.-bidirectional.pbtxt | 4 + ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 4 + .../tensorflow.keras.layers.-wrapper.pbtxt | 4 + 8 files changed, 198 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 4bf6ae975f..b34b92c763 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -87,7 +87,7 @@ class StackedRNNCells(Layer): state_size.append(cell.state_size) return tuple(state_size) - def call(self, inputs, states, **kwargs): + def call(self, inputs, states, constants=None, **kwargs): # Recover per-cell states. nested_states = [] for cell in self.cells[::-1]: @@ -102,7 +102,12 @@ class StackedRNNCells(Layer): # Call the cells in order and store the returned states. new_nested_states = [] for cell, states in zip(self.cells, nested_states): - inputs, states = cell.call(inputs, states, **kwargs) + if has_arg(cell.call, 'constants'): + inputs, states = cell.call(inputs, states, constants=constants, + **kwargs) + else: + inputs, states = cell.call(inputs, states, **kwargs) + new_nested_states.append(states) # Format the new states as a flat list @@ -114,9 +119,15 @@ class StackedRNNCells(Layer): @shape_type_conversion def build(self, input_shape): + if isinstance(input_shape, list): + constants_shape = input_shape[1:] + input_shape = input_shape[0] for cell in self.cells: if isinstance(cell, Layer): - cell.build(input_shape) + if has_arg(cell.call, 'constants'): + cell.build([input_shape] + constants_shape) + else: + cell.build(input_shape) if hasattr(cell.state_size, '__len__'): output_dim = cell.state_size[0] else: @@ -527,12 +538,14 @@ class RNN(Layer): self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty - is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') + is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: - if hasattr(tensor, '_keras_history') != is_keras_tensor: + if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') + ' Keras tensors and non-Keras tensors' + '(a "Keras tensor" is a tensor that was' + 'returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants @@ -796,7 +809,8 @@ class SimpleRNNCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, @@ -966,6 +980,7 @@ class SimpleRNN(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. @@ -1176,10 +1191,14 @@ class GRUCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1427,10 +1446,14 @@ class GRU(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1661,10 +1684,14 @@ class LSTMCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`).x use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1943,10 +1970,14 @@ class LSTM(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs.. diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py index ab48a63e35..de022153f6 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -253,7 +253,7 @@ class RNNTest(test.TestCase): self.assertAllClose(y_np, y_np_2, atol=1e-4) with self.test_session(): - # test flat list inputs + # test flat list inputs. with keras.utils.CustomObjectScope(custom_objects): layer = keras.layers.RNN.from_config(config.copy()) y = layer([x, c]) @@ -262,6 +262,35 @@ class RNNTest(test.TestCase): y_np_3 = model.predict([x_np, c_np]) self.assertAllClose(y_np, y_np_3, atol=1e-4) + with self.test_session(): + # Test stacking. + cells = [keras.layers.recurrent.GRUCell(8), + RNNCellWithConstants(12), + RNNCellWithConstants(32)] + layer = keras.layers.recurrent.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test stacked RNN serialization + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.recurrent.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + def test_rnn_cell_with_constants_layer_passing_initial_state(self): class RNNCellWithConstants(keras.layers.Layer): diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index f053aa1d09..61f1a758e4 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -61,6 +61,14 @@ class Wrapper(Layer): else: return None + @property + def trainable(self): + return self.layer.trainable + + @trainable.setter + def trainable(self, value): + self.layer.trainable = value + @property def trainable_weights(self): return self.layer.trainable_weights @@ -255,7 +263,6 @@ class Bidirectional(Wrapper): """ def __init__(self, layer, merge_mode='concat', weights=None, **kwargs): - super(Bidirectional, self).__init__(layer, **kwargs) if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' @@ -275,6 +282,19 @@ class Bidirectional(Wrapper): self.return_sequences = layer.return_sequences self.return_state = layer.return_state self.supports_masking = True + self._trainable = True + super(Bidirectional, self).__init__(layer, **kwargs) + self.input_spec = layer.input_spec + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + self._trainable = value + self.forward_layer.trainable = value + self.backward_layer.trainable = value def get_weights(self): return self.forward_layer.get_weights() + self.backward_layer.get_weights() @@ -305,6 +325,61 @@ class Bidirectional(Wrapper): return [output_shape] + state_shape + copy.copy(state_shape) return output_shape + def __call__(self, inputs, initial_state=None, **kwargs): + if isinstance(inputs, list): + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + if initial_state is None: + return super(Bidirectional, self).__call__(inputs, **kwargs) + + # Standardize `initial_state` into list + if isinstance(initial_state, tuple): + initial_state = list(initial_state) + elif not isinstance(initial_state, list): + initial_state = [initial_state] + + # Check if `initial_state` can be splitted into half + num_states = len(initial_state) + if num_states % 2 > 0: + raise ValueError( + 'When passing `initial_state` to a Bidirectional RNN, the state ' + 'should be a list containing the states of the underlying RNNs. ' + 'Found: ' + str(initial_state)) + + # Applies the same workaround as in `RNN.__call__`, without handling + # constants + kwargs['initial_state'] = initial_state + additional_inputs = initial_state + additional_specs = [InputSpec(shape=K.int_shape(state)) + for state in initial_state] + self.forward_layer.state_spec = additional_specs[:num_states // 2] + self.backward_layer.state_spec = additional_specs[num_states // 2:] + + is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) + for tensor in additional_inputs: + if K.is_keras_tensor(tensor) != is_keras_tensor: + raise ValueError('The initial state of a Bidirectional' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors' + ' (a "Keras tensor" is a tensor that was' + ' returned by a Keras layer, or by `Input`)') + + if is_keras_tensor: + # Compute the full input spec, including state + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs + + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(Bidirectional, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec + return output + else: + return super(Bidirectional, self).__call__(inputs, **kwargs) + def call(self, inputs, training=None, mask=None, initial_state=None): kwargs = {} if has_arg(self.layer.call, 'training'): @@ -313,11 +388,6 @@ class Bidirectional(Wrapper): kwargs['mask'] = mask if initial_state is not None and has_arg(self.layer.call, 'initial_state'): - if not isinstance(initial_state, list): - raise ValueError( - 'When passing `initial_state` to a Bidirectional RNN, the state ' - 'should be a list containing the states of the underlying RNNs. ' - 'Found: ' + str(initial_state)) forward_state = initial_state[:len(initial_state) // 2] backward_state = initial_state[len(initial_state) // 2:] y = self.forward_layer.call(inputs, initial_state=forward_state, **kwargs) diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py index f48c8919a1..c81d6b883c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py @@ -133,6 +133,20 @@ class TimeDistributedTest(test.TestCase): # Verify input_map has one mapping from inputs to reshaped inputs. self.assertEqual(len(td._input_map.keys()), 1) + def test_TimeDistributed_trainable(self): + # test layers that need learning_phase to be set + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) + _ = layer(x) + assert len(layer.updates) == 2 + assert len(layer.trainable_weights) == 2 + layer.trainable = False + assert not layer.updates + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.updates) == 2 + assert len(layer.trainable_weights) == 2 + class BidirectionalTest(test.TestCase): @@ -338,23 +352,38 @@ class BidirectionalTest(test.TestCase): units = 3 with self.test_session(): - inputs = keras.Input((timesteps, dim)) + input1 = keras.layers.Input((timesteps, dim)) layer = keras.layers.Bidirectional( rnn(units, return_state=True, return_sequences=True)) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] + state = layer(input1)[1:] # test passing invalid initial_state: passing a tensor + input2 = keras.layers.Input((timesteps, dim)) with self.assertRaises(ValueError): output = keras.layers.Bidirectional( - rnn(units))(output, initial_state=state[0]) + rnn(units))(input2, initial_state=state[0]) # test valid usage: passing a list - output = keras.layers.Bidirectional( - rnn(units))(output, initial_state=state) - model = keras.Model(inputs, output) - inputs = np.random.rand(samples, timesteps, dim) - outputs = model.predict(inputs) + output = keras.layers.Bidirectional(rnn(units))(input2, + initial_state=state) + model = keras.models.Model([input1, input2], output) + assert len(model.layers) == 4 + assert isinstance(model.layers[-1].input, list) + inputs = [np.random.rand(samples, timesteps, dim), + np.random.rand(samples, timesteps, dim)] + model.predict(inputs) + + def test_Bidirectional_trainable(self): + # test layers that need learning_phase to be set + with self.test_session(): + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) + _ = layer(x) + assert len(layer.trainable_weights) == 6 + layer.trainable = False + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.trainable_weights) == 6 def _to_list(ls): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index db26c3e568..699208a0b9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -73,6 +73,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 90c37bd986..3dde1e5769 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -122,7 +122,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'states\', \'constants\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index 40aa782a02..1e176d8d4b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -69,6 +69,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 27a54382a4..ea3bb2f8f5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -68,6 +68,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" -- GitLab From 1a1617e946db2b7c1acd1eafa9a47561eb68dfb5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 15:32:17 -0800 Subject: [PATCH 242/629] Add /learning/tfx/ to the visibility group of tensorflow/compiler/tf2xla/python. PiperOrigin-RevId: 185912486 --- tensorflow/compiler/tf2xla/python/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/python/BUILD b/tensorflow/compiler/tf2xla/python/BUILD index 49bde78039..f0a2ef0651 100644 --- a/tensorflow/compiler/tf2xla/python/BUILD +++ b/tensorflow/compiler/tf2xla/python/BUILD @@ -1,7 +1,10 @@ licenses(["notice"]) # Apache 2.0 package( - default_visibility = ["//tensorflow:internal"], + default_visibility = [ + "//learning/tfx:__subpackages__", + "//tensorflow:internal", + ], ) load( -- GitLab From f0a968651119a7dd17e727664c4741eaf737e839 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 15:42:02 -0800 Subject: [PATCH 243/629] Linter fixes --- .../examples/image_retraining/retrain.py | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index fb21ccbbb7..8d2e4a07f0 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -41,7 +41,6 @@ The subfolder names are important, since they define what label is applied to each image, but the filenames themselves don't matter. Once your images are prepared, you can run the training with a command like this: - ```bash bazel build tensorflow/examples/image_retraining:retrain && \ bazel-bin/tensorflow/examples/image_retraining/retrain \ @@ -70,12 +69,14 @@ on resource-limited platforms, you can try the `--architecture` flag with a Mobilenet model. For example: Run floating-point version of mobilenet: + ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` Run quantized version of mobilenet: + ```bash python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized @@ -98,8 +99,10 @@ tensorboard --logdir /tmp/retrain_logs To use with Tensorflow Serving: -tensorflow_model_server --port=9000 --model_name=inception --model_base_path=/tmp/saved_models/ - +```bash +tensorflow_model_server --port=9000 --model_name=inception \ + --model_base_path=/tmp/saved_models/ +``` """ from __future__ import absolute_import from __future__ import division @@ -1026,24 +1029,25 @@ def export_model(sess, architecture, saved_model_dir): inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + outputs = {'prediction': + tf.saved_model.utils.build_tensor_info(out_classes)} signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME - ) + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') # Save out the SavedModel. builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature - }, - legacy_init_op=legacy_init_op) + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map = { + tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) builder.save() -- GitLab From c593796a4f87f308b157ed41207eee9ff7d62de7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 15:54:39 -0800 Subject: [PATCH 244/629] Don't spam the logs. PiperOrigin-RevId: 185916071 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8f89f2ae64..b8a21ea5a1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1712,7 +1712,6 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { - LOG(INFO) << "Graph before: " << item.graph.DebugString(); nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -1739,7 +1738,6 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); - LOG(INFO) << "Graph after: " << output->DebugString(); return Status::OK(); } -- GitLab From c859e8a7b7611a30730f176fc9cddcb2dd59adfb Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 15 Feb 2018 15:55:32 -0800 Subject: [PATCH 245/629] Fix a typo in model.cc error message. PiperOrigin-RevId: 185916196 --- tensorflow/contrib/lite/model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 841e96f137..d6522fc077 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -136,7 +136,7 @@ TfLiteStatus InterpreterBuilder::BuildLocalIndexToRegistrationMapping() { } } else if (!opcode->custom_code()) { error_reporter_->Report( - "Operator with builtin_code==0 has no custom_code.\n"); + "Operator with CUSTOM builtin_code has no custom_code.\n"); status = kTfLiteError; } else { const char* name = opcode->custom_code()->c_str(); -- GitLab From e6f69c1161f24e80e71caeab6c721a98b208d5d7 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 15 Feb 2018 15:56:10 -0800 Subject: [PATCH 246/629] Update eager's MNIST example to inherit from `tf.keras.Model`. Also make estimator utils compatible with `tf_decorator`-wrapped functions. PiperOrigin-RevId: 185916290 --- .../eager/python/examples/mnist/mnist.py | 25 +++++++++---------- tensorflow/python/estimator/util.py | 2 ++ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py index ed7dbc8904..241eb23ce9 100644 --- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py +++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py @@ -35,7 +35,7 @@ from tensorflow.examples.tutorials.mnist import input_data FLAGS = None -class MNISTModel(tfe.Network): +class MNISTModel(tf.keras.Model): """MNIST Network. Network structure is equivalent to: @@ -61,18 +61,17 @@ class MNISTModel(tfe.Network): else: assert data_format == 'channels_last' self._input_shape = [-1, 28, 28, 1] - self.conv1 = self.track_layer( - tf.layers.Conv2D(32, 5, data_format=data_format, activation=tf.nn.relu)) - self.conv2 = self.track_layer( - tf.layers.Conv2D(64, 5, data_format=data_format, activation=tf.nn.relu)) - self.fc1 = self.track_layer(tf.layers.Dense(1024, activation=tf.nn.relu)) - self.fc2 = self.track_layer(tf.layers.Dense(10)) - self.dropout = self.track_layer(tf.layers.Dropout(0.5)) - self.max_pool2d = self.track_layer( - tf.layers.MaxPooling2D( - (2, 2), (2, 2), padding='SAME', data_format=data_format)) - - def call(self, inputs, training): + self.conv1 = tf.layers.Conv2D( + 32, 5, data_format=data_format, activation=tf.nn.relu) + self.conv2 = tf.layers.Conv2D( + 64, 5, data_format=data_format, activation=tf.nn.relu) + self.fc1 = tf.layers.Dense(1024, activation=tf.nn.relu) + self.fc2 = tf.layers.Dense(10) + self.dropout = tf.layers.Dropout(0.5) + self.max_pool2d = tf.layers.MaxPooling2D( + (2, 2), (2, 2), padding='SAME', data_format=data_format) + + def call(self, inputs, training=False): """Computes labels from inputs. Users should invoke __call__ to run the network, which delegates to this diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py index b7ba76d871..3ce8eea84b 100644 --- a/tensorflow/python/estimator/util.py +++ b/tensorflow/python/estimator/util.py @@ -21,10 +21,12 @@ from __future__ import print_function import functools +from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect def _is_bounded_method(fn): + _, fn = tf_decorator.unwrap(fn) return tf_inspect.ismethod(fn) and (fn.__self__ is not None) -- GitLab From f5b30312013df5b7bd3a50555b2facadd4aed204 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 16:23:43 -0800 Subject: [PATCH 247/629] K-FAC: Support for embedding layers, add FisherFactor.{multiply, multiply_inverse}. PiperOrigin-RevId: 185920837 --- .../python/kernel_tests/fisher_blocks_test.py | 64 ++- .../kernel_tests/fisher_factors_test.py | 42 +- .../contrib/kfac/python/ops/fisher_blocks.py | 144 +++++-- .../kfac/python/ops/fisher_blocks_lib.py | 5 +- .../contrib/kfac/python/ops/fisher_factors.py | 387 +++++++++++++++--- .../kfac/python/ops/fisher_factors_lib.py | 29 +- .../kfac/python/ops/layer_collection.py | 52 +++ tensorflow/contrib/kfac/python/ops/utils.py | 61 ++- .../contrib/kfac/python/ops/utils_lib.py | 2 + 9 files changed, 662 insertions(+), 124 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index 82accd57f0..fb4b3a241c 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -26,6 +26,7 @@ from tensorflow.contrib.kfac.python.ops import utils from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import state_ops @@ -236,10 +237,10 @@ class NaiveDiagonalFBTest(test.TestCase): self.assertAllClose(output_flat, explicit) -class FullyConnectedDiagonalFB(test.TestCase): +class FullyConnectedDiagonalFBTest(test.TestCase): def setUp(self): - super(FullyConnectedDiagonalFB, self).setUp() + super(FullyConnectedDiagonalFBTest, self).setUp() self.batch_size = 4 self.input_size = 6 @@ -375,6 +376,65 @@ class FullyConnectedDiagonalFB(test.TestCase): return multiply_result, multiply_inverse_result +class EmbeddingKFACFBTest(test.TestCase): + + def testInstantiateFactors(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + + # Create a Fisher Block. + vocab_size = 5 + block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) + + # Add some examples. + inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) + outputs = array_ops.constant([[0.], [1.], [2.]]) + block.register_additional_minibatch(inputs, outputs) + + # Instantiate factor's variables. Ensure it doesn't fail. + grads = outputs**2. + damping = array_ops.constant(0.) + block.instantiate_factors(([grads],), damping) + + def testMultiplyInverse(self): + with ops.Graph().as_default(), self.test_session() as sess: + random_seed.set_random_seed(200) + + # Create a Fisher Block. + vocab_size = 5 + block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) + + # Add some examples. + inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) + outputs = array_ops.constant([[0.], [1.], [2.]]) + block.register_additional_minibatch(inputs, outputs) + + # Instantiate factor's variables. Ensure it doesn't fail. + grads = outputs**2. + damping = array_ops.constant(0.) + block.instantiate_factors(([grads],), damping) + + # Create a sparse update. + indices = array_ops.constant([1, 3, 4]) + values = array_ops.constant([[1.], [1.], [1.]]) + sparse_vector = ops.IndexedSlices( + values, indices, dense_shape=[vocab_size, 1]) + dense_vector = array_ops.reshape([0., 1., 0., 1., 1.], [vocab_size, 1]) + + # Compare Fisher-vector product against explicit result. + result = block.multiply_inverse(sparse_vector) + expected_result = linalg_ops.matrix_solve(block.full_fisher_block(), + dense_vector) + + sess.run(tf_variables.global_variables_initializer()) + self.assertAlmostEqual( + sess.run(expected_result[1]), sess.run(result.values[0])) + self.assertAlmostEqual( + sess.run(expected_result[3]), sess.run(result.values[1])) + self.assertAlmostEqual( + sess.run(expected_result[4]), sess.run(result.values[2])) + + class FullyConnectedKFACBasicFBTest(test.TestCase): def testFullyConnectedKFACBasicFBInit(self): diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 753378d9f4..66e18974ab 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -89,6 +89,21 @@ class FisherFactorTestingDummy(ff.FisherFactor): def make_inverse_update_ops(self): return [] + def get_cov(self): + return NotImplementedError + + def left_multiply(self, x, damping): + return NotImplementedError + + def right_multiply(self, x, damping): + return NotImplementedError + + def left_multiply_inverse(self, x, damping): + return NotImplementedError + + def right_multiply_inverse(self, x, damping): + return NotImplementedError + class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor): """Dummy class to test the non-abstract methods on ff.InverseProvidingFactor. @@ -379,7 +394,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) - self.assertEqual([6, 1], factor.get_cov().get_shape().as_list()) + self.assertEqual([6, 1], factor.get_cov_var().get_shape().as_list()) def testNaiveDiagonalFactorInitFloat64(self): with tf_ops.Graph().as_default(): @@ -387,7 +402,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) - cov = factor.get_cov() + cov = factor.get_cov_var() self.assertEqual(cov.dtype, dtype) self.assertEqual([6, 1], cov.get_shape().as_list()) @@ -402,6 +417,29 @@ class NaiveDiagonalFactorTest(test.TestCase): self.assertAllClose([[0.75], [1.5]], new_cov) +class EmbeddingInputKroneckerFactorTest(test.TestCase): + + def testInitialization(self): + with tf_ops.Graph().as_default(): + input_ids = array_ops.constant([[0], [1], [4]]) + vocab_size = 5 + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + cov = factor.get_cov_var() + self.assertEqual(cov.shape.as_list(), [vocab_size]) + + def testCovarianceUpdateOp(self): + with tf_ops.Graph().as_default(): + input_ids = array_ops.constant([[0], [1], [4]]) + vocab_size = 5 + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + cov_update_op = factor.make_covariance_update_op(0.0) + + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + new_cov = sess.run(cov_update_op) + self.assertAllClose(np.array([1., 1., 0., 0., 1.]) / 3., new_cov) + + class FullyConnectedKroneckerFactorTest(test.TestCase): def _testFullyConnectedKroneckerFactorInit(self, diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 0d2fa706f5..cf38d28b43 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -92,10 +92,22 @@ def compute_pi_tracenorm(left_cov, right_cov): Returns: The computed scalar constant pi for these Kronecker Factors (as a Tensor). """ + + def _trace(cov): + if len(cov.shape) == 1: + # Diagonal matrix. + return math_ops.reduce_sum(cov) + elif len(cov.shape) == 2: + # Full matrix. + return math_ops.trace(cov) + else: + raise ValueError( + "What's the trace of a Tensor of rank %d?" % len(cov.shape)) + # Instead of dividing by the dim of the norm, we multiply by the dim of the # other norm. This works out the same in the ratio. - left_norm = math_ops.trace(left_cov) * right_cov.shape.as_list()[0] - right_norm = math_ops.trace(right_cov) * left_cov.shape.as_list()[0] + left_norm = _trace(left_cov) * right_cov.shape.as_list()[0] + right_norm = _trace(right_cov) * left_cov.shape.as_list()[0] return math_ops.sqrt(left_norm / right_norm) @@ -201,15 +213,15 @@ class FullFB(FisherBlock): self._factor.register_damped_inverse(damping) def multiply_inverse(self, vector): - inverse = self._factor.get_damped_inverse(self._damping) - out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector)) + vector_flat = utils.tensors_to_column(vector) + out_flat = self._factor.left_multiply_inverse( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = ( - math_ops.matmul(self._factor.get_cov(), vector_flat) + - self._damping * vector_flat) + out_flat = self._factor.left_multiply( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): @@ -265,16 +277,20 @@ class NaiveDiagonalFB(FisherBlock): def multiply_inverse(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = vector_flat / (self._factor.get_cov() + self._damping) + print("vector_flat: %s" % vector_flat) + out_flat = self._factor.left_multiply_inverse( + vector_flat, self._damping) + print("out_flat: %s" % out_flat) return utils.column_to_tensors(vector, out_flat) def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = vector_flat * (self._factor.get_cov() + self._damping) + out_flat = self._factor.left_multiply( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): - return array_ops.diag(array_ops.reshape(self._factor.get_cov(), (-1,))) + return self._factor.get_cov() def tensors_to_compute_grads(self): return self._params @@ -356,8 +372,9 @@ class FullyConnectedDiagonalFB(FisherBlock): Tensor of the same shape, corresponding to the inverse Fisher-vector product. """ - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect / (self._factor.get_cov() + self._damping) + reshaped_vec = utils.layer_params_to_mat2d(vector) + reshaped_out = self._factor.left_multiply_inverse( + reshaped_vec, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): @@ -372,8 +389,9 @@ class FullyConnectedDiagonalFB(FisherBlock): Returns: Tensor of the same shape, corresponding to the Fisher-vector product. """ - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect * (self._factor.get_cov() + self._damping) + reshaped_vec = utils.layer_params_to_mat2d(vector) + reshaped_out = self._factor.left_multiply( + reshaped_vec, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def tensors_to_compute_grads(self): @@ -468,12 +486,14 @@ class ConvDiagonalFB(FisherBlock): def multiply_inverse(self, vector): reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect / (self._factor.get_cov() + self._damping) + reshaped_out = self._factor.left_multiply_inverse( + reshaped_vect, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect * (self._factor.get_cov() + self._damping) + reshaped_out = self._factor.left_multiply( + reshaped_vect, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def tensors_to_compute_grads(self): @@ -533,28 +553,24 @@ class KroneckerProductFB(FisherBlock): return 1.0 def multiply_inverse(self, vector): - left_factor_inv = self._input_factor.get_damped_inverse(self._input_damping) - right_factor_inv = self._output_factor.get_damped_inverse( - self._output_damping) reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = math_ops.matmul(left_factor_inv, - math_ops.matmul(reshaped_vector, - right_factor_inv)) + reshaped_out = self._output_factor.right_multiply_inverse( + reshaped_vector, + self._output_damping) + reshaped_out = self._input_factor.left_multiply_inverse( + reshaped_out, self._input_damping) if self._renorm_coeff != 1.0: reshaped_out /= math_ops.cast( self._renorm_coeff, dtype=reshaped_out.dtype) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): - left_factor = self._input_factor.get_cov() - right_factor = self._output_factor.get_cov() reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = ( - math_ops.matmul(reshaped_vector, right_factor) + - self._output_damping * reshaped_vector) - reshaped_out = ( - math_ops.matmul(left_factor, reshaped_out) + - self._input_damping * reshaped_out) + reshaped_out = self._output_factor.right_multiply( + reshaped_vector, + self._output_damping) + reshaped_out = self._input_factor.left_multiply( + reshaped_out, self._input_damping) if self._renorm_coeff != 1.0: reshaped_out *= math_ops.cast( self._renorm_coeff, dtype=reshaped_out.dtype) @@ -574,6 +590,74 @@ class KroneckerProductFB(FisherBlock): right_factor) +class EmbeddingKFACFB(KroneckerProductFB): + """K-FAC FisherBlock for embedding layers. + + This FisherBlock is similar to EmbeddingKFACFB, except that its + input factor is approximated by a diagonal matrix. In the case that each + example references exactly one embedding, this approximation is exact. + + Does not support bias parameters. + """ + + def __init__(self, layer_collection, vocab_size): + """Creates a EmbeddingKFACFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + vocab_size: int. Size of vocabulary for this embedding layer. + """ + self._inputs = [] + self._outputs = [] + self._vocab_size = vocab_size + + super(EmbeddingKFACFB, self).__init__(layer_collection) + + def instantiate_factors(self, grads_list, damping): + """Instantiate Kronecker Factors for this FisherBlock. + + Args: + grads_list: List of list of Tensors. grads_list[i][j] is the + gradient of the loss with respect to 'outputs' from source 'i' and + tower 'j'. Each Tensor has shape [tower_minibatch_size, output_size]. + damping: 0-D Tensor or float. 'damping' * identity is approximately added + to this FisherBlock's Fisher approximation. + """ + # TODO(b/68033310): Validate which of, + # (1) summing on a single device (as below), or + # (2) on each device in isolation and aggregating + # is faster. + inputs = _concat_along_batch_dim(self._inputs) + grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) + + self._input_factor = self._layer_collection.make_or_get_factor( # + fisher_factors.EmbeddingInputKroneckerFactor, # + ((inputs,), self._vocab_size)) + self._output_factor = self._layer_collection.make_or_get_factor( # + fisher_factors.FullyConnectedKroneckerFactor, # + (grads_list,)) + self._register_damped_input_and_output_inverses(damping) + + def tensors_to_compute_grads(self): + return self._outputs + + def register_additional_minibatch(self, inputs, outputs): + """Registers an additional minibatch to the FisherBlock. + + Args: + inputs: Tensor of shape [batch_size, input_size]. Inputs to the + matrix-multiply. + outputs: Tensor of shape [batch_size, output_size]. Layer preactivations. + """ + self._inputs.append(inputs) + self._outputs.append(outputs) + + @property + def num_registered_minibatches(self): + return len(self._inputs) + + class FullyConnectedKFACBasicFB(KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py index ac39630920..c04cf727fa 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py @@ -29,6 +29,7 @@ _allowed_symbols = [ 'NaiveDiagonalFB', 'FullyConnectedDiagonalFB', 'KroneckerProductFB', + 'EmbeddingKFACFB', 'FullyConnectedKFACBasicFB', 'ConvKFCBasicFB', 'ConvDiagonalFB', @@ -36,7 +37,9 @@ _allowed_symbols = [ 'compute_pi_tracenorm', 'compute_pi_adjusted_damping', 'num_conv_locations', - 'normalize_damping' + 'normalize_damping', + 'LEFT_MULTIPLY', + 'RIGHT_MULTIPLY', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index bcba18ae14..603d8b8b21 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -25,13 +25,13 @@ import numpy as np import six from tensorflow.contrib.kfac.python.ops import utils +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables @@ -112,54 +112,6 @@ def diagonal_covariance_initializer(shape, dtype, partition_info): # pylint: di return array_ops.ones(shape, dtype) -def extract_image_patches(image, ksizes, strides, padding, name=None): - """Extracts image patches for an N-dimensional convolution. - - This function is a compatibility wrapper over tf.extract_image_patches(), as - ExtractImagePatches isn't yet implemented in XLA. - - Args: - image: Tensor of shape [batch, in_x, in_y, ..., in_channels]. Input images. - All dimensions except 'batch' must be defined. - ksizes: [filter_x, filter_y, ...]. Spatial shape of filter in each - dimension. - strides: [stride_x, stride_y, ...]. Spatial stride for filter in each - dimension. - padding: str. "VALID" or "SAME". - name: str or None. name of Op. - - Returns: - result: [batch, out_x, out_y, ..., filter_x, filter_y, ..., in_channels]. - Contains image patches to which conv kernel would be applied for each - output location. [out_x, out_y, ...] depends on padding. - """ - if not utils.on_tpu(): - return array_ops.extract_image_patches( - image, - ksizes=([1] + list(ksizes) + [1]), - strides=([1] + list(strides) + [1]), - rates=[1, 1, 1, 1], - padding=padding, - name=name) - - with tf_ops.name_scope(name, "extract_image_patches", - [image, ksizes, strides, padding]): - batch = image.shape.as_list()[0] - in_channels = image.shape.as_list()[-1] - - # Map each input feature to a location in the output. - out_channels = np.prod(ksizes) * in_channels - filters = linalg_ops.eye(out_channels), - filters = array_ops.reshape(filters, ksizes + [in_channels, out_channels]) - - result = nn.convolution(image, filters, padding, strides=strides) - out_spatial = result.shape.as_list()[1:-1] - result = array_ops.reshape( - result, [batch or -1] + out_spatial + ksizes + [in_channels]) - - return result - - def compute_cov(tensor, tensor_right=None, normalizer=None): """Compute the empirical second moment of the rows of a 2D Tensor. @@ -259,12 +211,21 @@ def scalar_or_tensor_to_string(val): class FisherFactor(object): """Base class for objects modeling factors of approximate Fisher blocks. - Note that for blocks that aren't based on approximations, a 'factor' can - be the entire block itself, as is the case for the diagonal and full - representations. + A FisherFactor represents part of an approximate Fisher Information matrix. + For example, one approximation to the Fisher uses the Kronecker product of two + FisherFactors A and B, F = kron(A, B). FisherFactors are composed with + FisherBlocks to construct a block-diagonal approximation to the full Fisher. + + FisherFactors are backed by a single, non-trainable variable that is updated + by running FisherFactor.make_covariance_update_op(). The shape and type of + this variable is implementation specific. - Subclasses must implement the _compute_new_cov method, and the _var_scope - and _cov_shape properties. + Note that for blocks that aren't based on approximations, a 'factor' can + be the entire block itself, as is the case for the diagonal and full + representations. + + Subclasses must implement the _compute_new_cov() method, and the _var_scope + and _cov_shape properties. """ def __init__(self): @@ -272,16 +233,21 @@ class FisherFactor(object): @abc.abstractproperty def _var_scope(self): + """Variable scope for this FisherFactor instance. + + Returns: + string that unique identifies this FisherFactor instance. + """ pass @abc.abstractproperty def _cov_shape(self): - """The shape of the cov matrix.""" + """The shape of the variable backing this FisherFactor.""" pass @abc.abstractproperty def _num_sources(self): - """The number of things to sum over when computing cov. + """The number of things to sum over when updating covariance variable. The default make_covariance_update_op function will call _compute_new_cov with indices ranging from 0 to _num_sources-1. The typical situation is @@ -293,10 +259,12 @@ class FisherFactor(object): @abc.abstractproperty def _dtype(self): + """dtype for variable backing this factor.""" pass @property def _cov_initializer(self): + """Function for initializing covariance variable.""" return covariance_initializer def instantiate_covariance(self): @@ -311,6 +279,15 @@ class FisherFactor(object): @abc.abstractmethod def _compute_new_cov(self, idx=0): + """Computes minibatch-estimated covariance for a single source. + + Args: + idx: int in [0, self._num_sources). Which source to use when estimating + covariance. + + Returns: + Tensor of same shape as self.get_cov_var(). + """ pass def make_covariance_update_op(self, ema_decay): @@ -343,14 +320,101 @@ class FisherFactor(object): """Create and return update ops corresponding to registered computations.""" pass + @abc.abstractmethod def get_cov(self): + """Get full covariance matrix. + + Returns: + Tensor of shape [n, n]. Represents all parameter-parameter correlations + captured by this FisherFactor. + """ + pass + + def get_cov_var(self): + """Get variable backing this FisherFactor. + + May or may not be the same as self.get_cov() + + Returns: + Variable of shape self._cov_shape. + """ return self._cov + @abc.abstractmethod + def left_multiply(self, x, damping): + """Multiplies 'x' by the damped covariance of this factor. + + Let C be the covariance matrix this factor represents, and + D = C + damping * I be its damped variant. This method calculates + matmul(D, vec(x)). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def right_multiply(self, x, damping): + """Multiplies 'x' by the damped covariance of this factor. + + Let C be the covariance matrix this factor represents, and + D = C + damping * I be its damped variant. This method calculates + matmul(vec(x), D). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def left_multiply_inverse(self, x, damping): + """Multiplies 'x' by damped inverse of this factor. + + Let C be the covariance matrix this factor represents and + E = inv(C + damping * I) be its damped inverse. This method calculates + matmul(E, vec(x)). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def right_multiply_inverse(self, x, damping): + """Multiplies 'x' by damped inverse of this factor. + + Let C be the covariance matrix this factor represents and + E = inv(C + damping * I) be its damped inverse. This method calculates + matmul(vec(x), E). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + class InverseProvidingFactor(FisherFactor): - """Base class for FisherFactors that maintain inverses, powers, etc of _cov. + """Base class for FisherFactors that maintain inverses explicitly. - Assumes that the _cov property is a square PSD matrix. + This class explicitly calculates and stores inverses of covariance matrices + provided by the underlying FisherFactor implementation. It is assumed that + vectors can be represented as 2-D matrices. Subclasses must implement the _compute_new_cov method, and the _var_scope and _cov_shape properties. @@ -485,6 +549,61 @@ class InverseProvidingFactor(FisherFactor): def reset_eigendecomp(self): self._eigendecomp = None + def get_cov(self): + # Variable contains full covariance matrix. + return self.get_cov_var() + + def left_multiply(self, x, damping): + n = self.get_cov().shape[0] + damped_cov = self.get_cov() + damping * array_ops.eye(n) + + if isinstance(x, tf_ops.IndexedSlices): + raise NotImplementedError( + "Left-multiply not yet supported for IndexedSlices.") + + if len(x.shape) != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(damped_cov, x) + + def right_multiply(self, x, damping): + n = self.get_cov().shape[0] + damped_cov = self.get_cov() + damping * array_ops.eye(n) + + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_sparse_dense(x, damped_cov) + + if len(x.shape) != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(x, damped_cov) + + def left_multiply_inverse(self, x, damping): + if isinstance(x, tf_ops.IndexedSlices): + raise ValueError("Left-multiply not yet supported for IndexedSlices.") + + if x.shape.ndims != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(self.get_damped_inverse(damping), x) + + def right_multiply_inverse(self, x, damping): + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_sparse_dense(x, self.get_damped_inverse(damping)) + + if x.shape.ndims != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(x, self.get_damped_inverse(damping)) + class FullFactor(InverseProvidingFactor): """FisherFactor for a full matrix representation of the Fisher of a parameter. @@ -530,7 +649,11 @@ class FullFactor(InverseProvidingFactor): class DiagonalFactor(FisherFactor): - """A base class for FisherFactors that use diagonal approximations.""" + """A base class for FisherFactors that use diagonal approximations. + + A DiagonalFactor's covariance variable can be of any shape, but must contain + exactly one entry per parameter. + """ def __init__(self): super(DiagonalFactor, self).__init__() @@ -542,6 +665,45 @@ class DiagonalFactor(FisherFactor): def make_inverse_update_ops(self): return [] + def get_cov(self): + # self.get_cov() could be any shape, but it must have one entry per + # parameter. Flatten it into a vector. + cov_diag_vec = array_ops.reshape(self.get_cov_var(), [-1]) + return array_ops.diag(cov_diag_vec) + + def left_multiply(self, x, damping): + damped_cov = self.get_cov_var() + damping + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_diag_sparse(array_ops.reshape(damped_cov, [-1]), x) + + if x.shape != damped_cov.shape: + raise ValueError("x (%s) and cov (%s) must have same shape." % + (x, damped_cov)) + + return damped_cov * x + + def right_multiply(self, x, damping): + raise NotImplementedError("Only left-multiply is currently supported.") + + def left_multiply_inverse(self, x, damping): + inverse = 1. / (self.get_cov_var() + damping) + + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_diag_sparse(array_ops.reshape(inverse, [-1]), x) + + if x.shape != inverse.shape: + raise ValueError("x (%s) and cov (%s) must have same shape." % + (x, inverse)) + + return inverse * x + + def right_multiply_inverse(self, x, damping): + raise NotImplementedError("Only left-multiply is currently supported.") + + def register_damped_inverse(self, damping): + # DiagonalFactors don't keep explicit inverses. + pass + class NaiveDiagonalFactor(DiagonalFactor): """FisherFactor for a diagonal approximation of any type of param's Fisher. @@ -553,6 +715,14 @@ class NaiveDiagonalFactor(DiagonalFactor): def __init__(self, params_grads, batch_size): + """Initializes NaiveDiagonalFactor instance. + + Args: + params_grads: Sequence of Tensors, each with same shape as parameters this + FisherFactor corresponds to. For example, the gradient of the loss with + respect to parameters. + batch_size: int or 0-D Tensor. Size + """ self._params_grads = tuple(utils.ensure_sequence(params_grad) for params_grad in params_grads) self._batch_size = batch_size @@ -567,7 +737,7 @@ class NaiveDiagonalFactor(DiagonalFactor): def _cov_shape(self): size = sum(param_grad.shape.num_elements() for param_grad in self._params_grads[0]) - return (size, 1) + return [size, 1] @property def _num_sources(self): @@ -584,6 +754,84 @@ class NaiveDiagonalFactor(DiagonalFactor): self._batch_size, params_grads_flat.dtype)) +class EmbeddingInputKroneckerFactor(DiagonalFactor): + r"""FisherFactor for input to an embedding layer. + + Given input_ids = [batch_size, input_size] representing indices into an + [vocab_size, embedding_size] embedding matrix, approximate input covariance by + a diagonal matrix, + + Cov(input_ids, input_ids) = + (1/batch_size) sum_{i} diag(n_hot(input[i]) ** 2). + + where n_hot() constructs an n-hot binary vector and diag() constructs a + diagonal matrix of size [vocab_size, vocab_size]. + """ + + def __init__(self, input_ids, vocab_size, dtype=None): + """Instantiate EmbeddingInputKroneckerFactor. + + Args: + input_ids: Tuple of Tensors of shape [batch_size, input_size] and dtype + int32. Indices into embedding matrix. + vocab_size: int or 0-D Tensor. Maximum value for entries in 'input_ids'. + dtype: dtype for covariance statistics. Must be a floating point type. + Defaults to float32. + """ + self._input_ids = input_ids + self._vocab_size = vocab_size + self._cov_dtype = dtype or dtypes.float32 + + super(EmbeddingInputKroneckerFactor, self).__init__() + + @property + def _var_scope(self): + return "ff_diag_embedding/" + scope_string_from_params(self._input_ids) + + @property + def _cov_shape(self): + return [self._vocab_size] + + @property + def _num_sources(self): + return len(self._input_ids) + + @property + def _dtype(self): + return self._cov_dtype + + def _compute_new_cov(self, idx=0): + with maybe_colocate_with(self._input_ids): + input_ids = self._input_ids[idx] + if len(input_ids.shape) > 2: + raise ValueError( + "Input to embeddings must have rank <= 2. Found rank %d." % len( + input_ids.shape)) + + batch_size = array_ops.shape(input_ids)[0] + + # Transform indices into one-hot vectors. + # + # TODO(b/72714822): There must be a faster way to construct the diagonal + # covariance matrix! This operation is O(batch_size * vocab_size), where + # it should be O(batch_size * input_size). + flat_input_ids = array_ops.reshape(input_ids, [-1]) + one_hots = array_ops.one_hot(flat_input_ids, + self._vocab_size) # [?, vocab_size] + + # Take average across examples. Note that, because all entries have + # magnitude zero or one, there's no need to square the entries. + # + # TODO(b/72714822): Support for SparseTensor, other kinds of aggregation + # within an example such as average. + # + # TODO(b/72714822): Support for partitioned embeddings. + new_cov = math_ops.reduce_sum(one_hots, axis=0) # [vocab_size] + new_cov /= math_ops.cast(batch_size, new_cov.dtype) + + return new_cov + + class FullyConnectedDiagonalFactor(DiagonalFactor): r"""FisherFactor for a diagonal approx of a fully-connected layer's Fisher. @@ -623,8 +871,9 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): @property def _cov_shape(self): - return [self._inputs.shape[1] + self._has_bias, - self._outputs_grads[0].shape[1]] + input_size = self._inputs.shape[1] + self._has_bias + output_size = self._outputs_grads[0].shape[1] + return [input_size, output_size] @property def _num_sources(self): @@ -717,10 +966,11 @@ class ConvDiagonalFactor(DiagonalFactor): # TODO(b/64144716): there is potential here for a big savings in terms # of memory use. - patches = extract_image_patches( + patches = array_ops.extract_image_patches( self._inputs, - ksizes=[filter_height, filter_width], - strides=self._strides[1:-1], + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], padding=self._padding) if self._has_bias: @@ -864,10 +1114,11 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. - patches = extract_image_patches( + patches = array_ops.extract_image_patches( self._inputs, - ksizes=[filter_height, filter_width], - strides=self._strides[1:-1], + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py index ad93919149..2d8e378a93 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py @@ -24,26 +24,15 @@ from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ - "inverse_initializer", - "covariance_initializer", - "diagonal_covariance_initializer", - "scope_string_from_params", - "scope_string_from_name", - "scalar_or_tensor_to_string", - "FisherFactor", - "InverseProvidingFactor", - "FullFactor", - "DiagonalFactor", - "NaiveDiagonalFactor", - "FullyConnectedDiagonalFactor", - "FullyConnectedKroneckerFactor", - "ConvInputKroneckerFactor", - "ConvOutputKroneckerFactor", - "ConvDiagonalFactor", - "set_global_constants", - "maybe_colocate_with", - "compute_cov", - "append_homog" + "inverse_initializer", "covariance_initializer", + "diagonal_covariance_initializer", "scope_string_from_params", + "scope_string_from_name", "scalar_or_tensor_to_string", "FisherFactor", + "InverseProvidingFactor", "FullFactor", "DiagonalFactor", + "NaiveDiagonalFactor", "EmbeddingInputKroneckerFactor", + "FullyConnectedDiagonalFactor", "FullyConnectedKroneckerFactor", + "ConvInputKroneckerFactor", "ConvOutputKroneckerFactor", + "ConvDiagonalFactor", "set_global_constants", "maybe_colocate_with", + "compute_cov", "append_homog" ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 8d450f04f3..ce9005b9ce 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -143,6 +143,7 @@ class LayerCollection(object): self._loss_dict = {} # {str: LossFunction} self._subgraph = None self._default_generic_approximation = APPROX_FULL_NAME + self._default_embedding_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_approximation = APPROX_KRONECKER_NAME self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_multi_approximation = ( @@ -178,6 +179,17 @@ class LayerCollection(object): """ return self._linked_parameters + @property + def default_embedding_approximation(self): + return self._default_embedding_approximation + + def set_default_embedding_approximation(self, value): + if value != APPROX_KRONECKER_NAME: + raise ValueError( + "{} is not a valid approximation for embedding variables.".format( + value)) + self._default_embedding_approximation = value + @property def default_generic_approximation(self): return self._default_generic_approximation @@ -417,6 +429,46 @@ class LayerCollection(object): else: return None + def register_embedding(self, + params, + inputs, + outputs, + approx=None, + reuse=VARIABLE_SCOPE): + """Registers a fully connnected layer. + + Args: + params: Embedding matrix of shape [vocab_size, embedding_size]. + inputs: Tensor of shape [batch_size, input_size] and dtype int32. Indices + into embedding matrix. + outputs: Tensor of shape [batch_size, output_size]. Outputs + produced by layer. + approx: str. Must be "kron". + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = self.default_embedding_approximation + + if approx != APPROX_KRONECKER_NAME: + raise ValueError("Bad value {} for approx.".format(approx)) + + if isinstance(params, (tuple, list)): + raise ValueError("Bias not supported.") + + vocab_size = int(params.shape[0]) + block = self.register_block( + params, fb.EmbeddingKFACFB(self, vocab_size), reuse=reuse) + block.register_additional_minibatch(inputs, outputs) + def register_fully_connected(self, params, inputs, diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index e89508fa46..f5bd97cb4e 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -144,7 +144,9 @@ def layer_params_to_mat2d(vector): [-1, w_part.shape.as_list()[-1]]) return array_ops.concat( (w_part_reshaped, array_ops.reshape(b_part, [1, -1])), axis=0) - else: + elif isinstance(vector, ops.IndexedSlices): + return vector + else: # Tensor or Tensor-like. return array_ops.reshape(vector, [-1, vector.shape.as_list()[-1]]) @@ -163,6 +165,11 @@ def mat2d_to_layer_params(vector_template, mat2d): if isinstance(vector_template, (tuple, list)): w_part, b_part = mat2d[:-1], mat2d[-1] return array_ops.reshape(w_part, vector_template[0].shape), b_part + elif isinstance(vector_template, ops.IndexedSlices): + if not isinstance(mat2d, ops.IndexedSlices): + raise TypeError( + "If vector_template is an IndexedSlices, so should mat2d.") + return mat2d else: return array_ops.reshape(mat2d, vector_template.shape) @@ -420,5 +427,57 @@ def batch_execute(global_step, thunks, batch_size, name=None): return result +def matmul_sparse_dense(A, B, name=None): # pylint: disable=invalid-name + """Computes matmul(A, B) where A is sparse, B is dense. + + Args: + A: tf.IndexedSlices with dense shape [m, n]. + B: tf.Tensor with shape [n, k]. + name: str. Name of op. + + Returns: + tf.IndexedSlices resulting from matmul(A, B). + + Raises: + ValueError: If A doesn't represent a matrix. + ValueError: If B is not rank-2. + """ + with ops.name_scope(name, "matmul_sparse_dense", [A, B]): + if A.indices.shape.ndims != 1 or A.values.shape.ndims != 2: + raise ValueError("A must represent a matrix. Found: %s." % A) + if B.shape.ndims != 2: + raise ValueError("B must be a matrix.") + new_values = math_ops.matmul(A.values, B) + return ops.IndexedSlices( + new_values, + A.indices, + dense_shape=array_ops.stack([A.dense_shape[0], new_values.shape[1]])) + + +def matmul_diag_sparse(A_diag, B, name=None): # pylint: disable=invalid-name + """Computes matmul(A, B) where A is a diagonal matrix, B is sparse. + + Args: + A_diag: diagonal entries of matrix A of shape [m, m]. + B: tf.IndexedSlices. Represents matrix of shape [m, n]. + name: str. Name of op. + + Returns: + tf.IndexedSlices resulting from matmul(A, B). + + Raises: + ValueError: If A_diag is not rank-1. + ValueError: If B doesn't represent a matrix. + """ + with ops.name_scope(name, "matmul_diag_sparse", [A_diag, B]): + A_diag = ops.convert_to_tensor(A_diag) + if A_diag.shape.ndims != 1: + raise ValueError("A_diag must be a rank-1 Tensor.") + if B.indices.shape.ndims != 1 or B.values.shape.ndims != 2: + raise ValueError("B must represent a matrix. Found: %s." % B) + a = array_ops.gather(A_diag, B.indices) + a = array_ops.reshape(a, list(a.shape) + [1] * (B.values.shape.ndims - 1)) + return ops.IndexedSlices(a * B.values, B.indices, dense_shape=B.dense_shape) + # TODO(b/69623235): Add a function for finding tensors that share gradients # to eliminate redundant fisher factor computations. diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py index fe8e39c212..8e424a7946 100644 --- a/tensorflow/contrib/kfac/python/ops/utils_lib.py +++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py @@ -40,6 +40,8 @@ _allowed_symbols = [ "fwd_gradients", "ensure_sequence", "batch_execute", + "matmul_sparse_dense", + "matmul_diag_sparse", ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) -- GitLab From 5141c830fea3c8ae68b1d47e37358d50341df866 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 16:27:38 -0800 Subject: [PATCH 248/629] Lint fix --- tensorflow/examples/image_retraining/retrain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 8d2e4a07f0..cae6948f20 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -1043,7 +1043,7 @@ def export_model(sess, architecture, saved_model_dir): builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map = { + signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature }, -- GitLab From 45a86d1acac8d5a3a6f159c9f5e186499e3d179a Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Thu, 15 Feb 2018 16:33:10 -0800 Subject: [PATCH 249/629] Lint fix --- tensorflow/python/estimator/estimator_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 1af331697e..641888d229 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -1291,7 +1291,8 @@ class EstimatorEvaluateTest(test.TestCase): writer_cache.FileWriterCache.clear() # Get last evaluation Event written. - if check_eventfile_for_keyword('image', os.path.join(est.model_dir, 'eval')): + if check_eventfile_for_keyword('image', + os.path.join(est.model_dir, 'eval')): return self.fail('{} should be part of reported summaries.'.format('image')) -- GitLab From 71878e136473e4ea2d85593171fcf221d3bced2a Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Thu, 15 Feb 2018 16:38:46 -0800 Subject: [PATCH 250/629] Update remove_control_dependencies.cc --- .../tools/graph_transforms/remove_control_dependencies.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index ba6df633be..cba6b78fc5 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include -#include +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/tools/graph_transforms/transform_utils.h" namespace tensorflow { namespace graph_transforms { -- GitLab From 3fa24cecf1a3486406a5c1d2af3452aba53f6686 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 15 Feb 2018 16:40:24 -0800 Subject: [PATCH 251/629] Adding Shape inference functions to infeed ops. PiperOrigin-RevId: 185923685 --- tensorflow/contrib/tpu/ops/infeed_ops.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/tpu/ops/infeed_ops.cc b/tensorflow/contrib/tpu/ops/infeed_ops.cc index 849c4a1102..efc546f9a6 100644 --- a/tensorflow/contrib/tpu/ops/infeed_ops.cc +++ b/tensorflow/contrib/tpu/ops/infeed_ops.cc @@ -41,6 +41,7 @@ REGISTER_OP("InfeedEnqueue") .Attr("dtype: type") .Attr("shape: shape = {}") .Attr("device_ordinal: int = -1") + .SetShapeFn(shape_inference::NoOutputs) .SetIsStateful() .Doc(R"doc( An op which feeds a single Tensor value into the computation. @@ -58,6 +59,7 @@ REGISTER_OP("InfeedEnqueueTuple") .Attr("dtypes: list(type)") .Attr("shapes: list(shape)") .Attr("device_ordinal: int = -1") + .SetShapeFn(shape_inference::NoOutputs) .SetIsStateful() .Doc(R"doc( An op which feeds multiple Tensor values into the computation as an XLA tuple. -- GitLab From af1cf84725cb776623fc42b275b965dfe452ce72 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Feb 2018 17:02:12 -0800 Subject: [PATCH 252/629] Fixes broken test PiperOrigin-RevId: 185926797 --- .../kernel_tests/cache_dataset_op_test.py | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py index b71652c980..02720a2e98 100644 --- a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -202,44 +203,45 @@ class FilesystemCacheDatasetTest(test.TestCase): class MemoryCacheDatasetTest(test.TestCase): def testCacheDatasetPassthrough(self): - repeat_count = variables.Variable(constant_op.constant(10, dtypes.int64)) - dataset = dataset_ops.Dataset.range(3).flat_map( - lambda x: dataset_ops.Dataset.from_tensors(x).repeat(repeat_count)) + with ops.device("cpu:0"): + repeat_count = variables.Variable(constant_op.constant(10, dtypes.int64)) + dataset = dataset_ops.Dataset.range(3).flat_map( + lambda x: dataset_ops.Dataset.from_tensors(x).repeat(repeat_count)) - cached_dataset = dataset.cache().repeat(2) - uncached_dataset = dataset.repeat(2) + cached_dataset = dataset.cache().repeat(2) + uncached_dataset = dataset.repeat(2) - # Needs to be initializable to capture the variable. - cached_iterator = cached_dataset.make_initializable_iterator() - cached_next = cached_iterator.get_next() - uncached_iterator = uncached_dataset.make_initializable_iterator() - uncached_next = uncached_iterator.get_next() + # Needs to be initializable to capture the variable. + cached_iterator = cached_dataset.make_initializable_iterator() + cached_next = cached_iterator.get_next() + uncached_iterator = uncached_dataset.make_initializable_iterator() + uncached_next = uncached_iterator.get_next() - with self.test_session() as sess: + with self.test_session() as sess: - sess.run(repeat_count.initializer) - sess.run(cached_iterator.initializer) - sess.run(uncached_iterator.initializer) + sess.run(repeat_count.initializer) + sess.run(cached_iterator.initializer) + sess.run(uncached_iterator.initializer) - for i in range(3): - for _ in range(10): - self.assertEqual(sess.run(cached_next), i) - self.assertEqual(sess.run(uncached_next), i) + for i in range(3): + for _ in range(10): + self.assertEqual(sess.run(cached_next), i) + self.assertEqual(sess.run(uncached_next), i) - sess.run(repeat_count.assign(0)) + sess.run(repeat_count.assign(0)) - # The uncached iterator should now be empty. - with self.assertRaises(errors.OutOfRangeError): - sess.run(uncached_next) + # The uncached iterator should now be empty. + with self.assertRaises(errors.OutOfRangeError): + sess.run(uncached_next) - # The cached iterator replays from cache. - for i in range(3): - for _ in range(10): - self.assertEqual(sess.run(cached_next), i) + # The cached iterator replays from cache. + for i in range(3): + for _ in range(10): + self.assertEqual(sess.run(cached_next), i) - # The cached iterator should now be empty. - with self.assertRaises(errors.OutOfRangeError): - sess.run(cached_next) + # The cached iterator should now be empty. + with self.assertRaises(errors.OutOfRangeError): + sess.run(cached_next) def testEmptyCacheReading(self): components = (np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8]), -- GitLab From f5c581f0f4649898ed00650fe98c7ef344e0f240 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Thu, 15 Feb 2018 17:05:41 -0800 Subject: [PATCH 253/629] Use np.frombuffer instead of np.fromstring to avoid DeprecationWarning. Resolves #17020 PiperOrigin-RevId: 185927310 --- tensorflow/python/framework/tensor_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 0e5f696111..cbba112841 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -557,7 +557,7 @@ def MakeNdarray(tensor): dtype = tensor_dtype.as_numpy_dtype if tensor.tensor_content: - return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape) + return np.frombuffer(tensor.tensor_content, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 -- GitLab From efe7f7b1b671f66bc8aacebe3be07742e4c429d0 Mon Sep 17 00:00:00 2001 From: Deron Eriksson Date: Thu, 15 Feb 2018 17:14:03 -0800 Subject: [PATCH 254/629] Fix typos in low-level introduction documentation Remove extraneous comma. Capitalize 'Loss' title. Add missing space to 'minimize the'. --- tensorflow/docs_src/programmers_guide/low_level_intro.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/low_level_intro.md b/tensorflow/docs_src/programmers_guide/low_level_intro.md index 8f6d3fbd46..234f0493e3 100644 --- a/tensorflow/docs_src/programmers_guide/low_level_intro.md +++ b/tensorflow/docs_src/programmers_guide/low_level_intro.md @@ -295,7 +295,7 @@ the same input. @{tf.layers$Layers} are the preferred way to add trainable parameters to a graph. Layers package together both the variables and the operations that act -on them, . For example a +on them. For example a [densely-connected layer](https://developers.google.com/machine-learning/glossary/#fully_connected_layer) performs a weighted sum across all inputs for each output and applies an optional @@ -478,7 +478,7 @@ good. Here's what we got; your own output will almost certainly differ: [ 0.10527515]] ``` -### loss +### Loss To optimize a model, you first need to define the loss. We'll use the mean square error, a standard loss for regression problems. @@ -504,7 +504,7 @@ TensorFlow provides [**optimizers**](https://developers.google.com/machine-learning/glossary/#optimizer) implementing standard optimization algorithms. These are implemented as sub-classes of @{tf.train.Optimizer}. They incrementally change each -variable in order to minimizethe loss. The simplest optimization algorithm is +variable in order to minimize the loss. The simplest optimization algorithm is [**gradient descent**](https://developers.google.com/machine-learning/glossary/#gradient_descent), implemented by @{tf.train.GradientDescentOptimizer}. It modifies each variable according to the magnitude of the derivative of loss with respect to -- GitLab From 11f1e50886f91ce2caa6e53b0bc9a1e82abdda8e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 17:38:55 -0800 Subject: [PATCH 255/629] Keep the results below 2^31 in exp() test to avoid overflowing. PiperOrigin-RevId: 185931075 --- tensorflow/contrib/lite/testing/generate_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 1ced3bfd73..944031da24 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -775,7 +775,8 @@ def make_exp_tests(zip_path): def build_inputs(parameters, sess, inputs, outputs): values = [ - create_tensor_data(parameters["input_dtype"], parameters["input_shape"]) + create_tensor_data(parameters["input_dtype"], parameters["input_shape"], + min_value=-100, max_value=9) ] return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) -- GitLab From 33071159b30278d9e5a1802480c03e5029fa4c93 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 17:44:59 -0800 Subject: [PATCH 256/629] Address timeout of conv_ops_test. PiperOrigin-RevId: 185931585 --- tensorflow/core/kernels/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 523e395699..cee3c55d1a 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1040,7 +1040,7 @@ tf_cc_test( tf_cc_test( name = "conv_ops_test", - size = "small", + size = "medium", srcs = ["conv_ops_test.cc"], deps = [ ":conv_ops", -- GitLab From b476a6eca15c9952293878728a2d0105e4223ac0 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 15 Feb 2018 18:21:11 -0800 Subject: [PATCH 257/629] Add stateful metrics support in tf.keras. PiperOrigin-RevId: 185935092 --- .../python/keras/_impl/keras/__init__.py | 2 +- .../python/keras/_impl/keras/callbacks.py | 38 ++++- .../keras/_impl/keras/engine/training.py | 140 +++++++++++------- .../python/keras/_impl/keras/metrics.py | 17 ++- .../python/keras/_impl/keras/metrics_test.py | 71 +++++++++ .../api/golden/tensorflow.keras.-model.pbtxt | 2 +- ...sorflow.keras.callbacks.-base-logger.pbtxt | 2 +- ...flow.keras.callbacks.-progbar-logger.pbtxt | 2 +- .../api/golden/tensorflow.keras.metrics.pbtxt | 2 +- .../tensorflow.keras.models.-model.pbtxt | 2 +- 10 files changed, 208 insertions(+), 70 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py index 7311353932..b63907b2e6 100644 --- a/tensorflow/python/keras/_impl/keras/__init__.py +++ b/tensorflow/python/keras/_impl/keras/__init__.py @@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.models import Sequential -__version__ = '2.1.3-tf' +__version__ = '2.1.4-tf' diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index de013c7c3f..f6c4661425 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -164,7 +164,7 @@ class CallbackList(object): class Callback(object): """Abstract base class used to build new callbacks. - # Properties + Attributes: params: dict. Training parameters (eg. verbosity, batch size, number of epochs...). model: instance of `keras.models.Model`. @@ -222,8 +222,18 @@ class BaseLogger(Callback): """Callback that accumulates epoch averages of metrics. This callback is automatically applied to every Keras model. + + Arguments: + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is in `on_epoch_end`. + All others will be averaged in `on_epoch_end`. """ + def __init__(self, stateful_metrics=None): + super(BaseLogger, self).__init__() + self.stateful_metrics = set(stateful_metrics or []) + def on_epoch_begin(self, epoch, logs=None): self.seen = 0 self.totals = {} @@ -234,17 +244,23 @@ class BaseLogger(Callback): self.seen += batch_size for k, v in logs.items(): - if k in self.totals: - self.totals[k] += v * batch_size + if k in self.stateful_metrics: + self.totals[k] = v else: - self.totals[k] = v * batch_size + if k in self.totals: + self.totals[k] += v * batch_size + else: + self.totals[k] = v * batch_size def on_epoch_end(self, epoch, logs=None): if logs is not None: for k in self.params['metrics']: if k in self.totals: # Make value available to next callbacks. - logs[k] = self.totals[k] / self.seen + if k in self.stateful_metrics: + logs[k] = self.totals[k] + else: + logs[k] = self.totals[k] / self.seen @tf_export('keras.callbacks.TerminateOnNaN') @@ -272,12 +288,16 @@ class ProgbarLogger(Callback): count_mode: One of "steps" or "samples". Whether the progress bar should count samples seen or steps (batches) seen. + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is. + All others will be averaged over time (e.g. loss, etc). Raises: ValueError: In case of invalid `count_mode`. """ - def __init__(self, count_mode='samples'): + def __init__(self, count_mode='samples', stateful_metrics=None): super(ProgbarLogger, self).__init__() if count_mode == 'samples': self.use_steps = False @@ -285,6 +305,7 @@ class ProgbarLogger(Callback): self.use_steps = True else: raise ValueError('Unknown `count_mode`: ' + str(count_mode)) + self.stateful_metrics = set(stateful_metrics or []) def on_train_begin(self, logs=None): self.verbose = self.params['verbose'] @@ -298,7 +319,10 @@ class ProgbarLogger(Callback): else: target = self.params['samples'] self.target = target - self.progbar = Progbar(target=self.target, verbose=self.verbose) + self.progbar = Progbar( + target=self.target, + verbose=self.verbose, + stateful_metrics=self.stateful_metrics) self.seen = 0 def on_batch_begin(self, batch, logs=None): diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index a71f371b8e..fd14bf3d05 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -31,6 +31,7 @@ from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras.engine import training_eager +from tensorflow.python.keras._impl.keras.engine.topology import Layer from tensorflow.python.keras._impl.keras.engine.topology import Network from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer @@ -274,7 +275,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if loss is None: + if y is None or loss is None: continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -487,7 +488,7 @@ def _standardize_weights(y, raise ValueError('`class_weight` not supported for ' '3+ dimensional targets.') if y.shape[1] > 1: - y_classes = y.argmax(axis=1) + y_classes = np.argmax(y, axis=1) elif y.shape[1] == 1: y_classes = np.reshape(y, y.shape[0]) else: @@ -519,7 +520,7 @@ class Model(Network): def compile(self, optimizer, - loss, + loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, @@ -581,7 +582,7 @@ class Model(Network): self.optimizer = optimizers.get(optimizer) self.loss = loss - self.metrics = metrics + self.metrics = metrics or [] self.loss_weights = loss_weights if context.in_eager_mode() and sample_weight_mode is not None: raise ValueError('sample_weight_mode is not supported in Eager mode.') @@ -817,7 +818,6 @@ class Model(Network): self._feed_sample_weight_modes.append(self.sample_weight_modes[i]) # Prepare metrics. - self.metrics = metrics self.weighted_metrics = weighted_metrics self.metrics_names = ['loss'] self.metrics_tensors = [] @@ -860,14 +860,8 @@ class Model(Network): nested_metrics = _collect_metrics(metrics, self.output_names) nested_weighted_metrics = _collect_metrics(weighted_metrics, self.output_names) - - def append_metric(layer_index, metric_name, metric_tensor): - """Helper function used in loop below.""" - if len(self.output_names) > 1: - metric_name = self.output_names[layer_index] + '_' + metric_name - self.metrics_names.append(metric_name) - self.metrics_tensors.append(metric_tensor) - + self.metrics_updates = [] + self.stateful_metric_names = [] with K.name_scope('metrics'): for i in range(len(self.outputs)): if i in skip_target_indices: @@ -886,42 +880,65 @@ class Model(Network): if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): # custom handling of accuracy/crossentropy # (because of class mode duality) - output_shape = K.int_shape(self.outputs[i]) + output_shape = self.outputs[i].get_shape().as_list() if (output_shape[-1] == 1 or self.loss_functions[i] == losses.binary_crossentropy): # case: binary accuracy/crossentropy if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.binary_accuracy + metric_fn = metrics_module.binary_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.binary_crossentropy + metric_fn = metrics_module.binary_crossentropy elif self.loss_functions[ i] == losses.sparse_categorical_crossentropy: # case: categorical accuracy/crossentropy with sparse targets if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.sparse_categorical_accuracy + metric_fn = metrics_module.sparse_categorical_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.sparse_categorical_crossentropy + metric_fn = metrics_module.sparse_categorical_crossentropy else: # case: categorical accuracy/crossentropy if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.categorical_accuracy + metric_fn = metrics_module.categorical_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.categorical_crossentropy + metric_fn = metrics_module.categorical_crossentropy if metric in ('accuracy', 'acc'): suffix = 'acc' elif metric in ('crossentropy', 'ce'): suffix = 'ce' - weighted_metric_fn = _weighted_masked_objective(acc_fn) + weighted_metric_fn = _weighted_masked_objective(metric_fn) metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) weighted_metric_fn = _weighted_masked_objective(metric_fn) - metric_name = metric_name_prefix + metric_fn.__name__ + # Get metric name as string + if hasattr(metric_fn, 'name'): + metric_name = metric_fn.name + else: + metric_name = metric_fn.__name__ + metric_name = metric_name_prefix + metric_name with K.name_scope(metric_name): metric_result = weighted_metric_fn( y_true, y_pred, weights=weights, mask=masks[i]) - append_metric(i, metric_name, metric_result) + + # Append to self.metrics_names, self.metric_tensors, + # self.stateful_metric_names + if len(self.output_names) > 1: + metric_name = '%s_%s' % (self.output_names[i], metric_name) + # Dedupe name + j = 1 + base_metric_name = metric_name + while metric_name in self.metrics_names: + metric_name = '%s_%d' % (base_metric_name, j) + j += 1 + self.metrics_names.append(metric_name) + self.metrics_tensors.append(metric_result) + + # Keep track of state updates created by + # stateful metrics (i.e. metrics layers). + if isinstance(metric_fn, Layer): + self.stateful_metric_names.append(metric_name) + self.metrics_updates += metric_fn.updates handle_metrics(output_metrics) handle_metrics(output_weighted_metrics, weights=weights) @@ -986,6 +1003,8 @@ class Model(Network): updates += self.get_updates_for(None) # Conditional updates relevant to this model updates += self.get_updates_for(self._feed_inputs) + # Stateful metrics updates + updates += self.metrics_updates # Gets loss and metrics. Updates weights at each call. self.train_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, @@ -1006,7 +1025,7 @@ class Model(Network): # Does update the network states. self.test_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, - updates=self.state_updates, + updates=self.state_updates + self.metrics_updates, name='test_function', **self._function_kwargs) @@ -1145,14 +1164,18 @@ class Model(Network): index_array = np.arange(num_train_samples) self.history = cbks.History() - callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history] + all_callbacks = [cbks.BaseLogger( + stateful_metrics=self.stateful_metric_names)] if verbose: if steps_per_epoch is not None: count_mode = 'steps' else: count_mode = 'samples' - callbacks += [cbks.ProgbarLogger(count_mode)] - callbacks = cbks.CallbackList(callbacks) + all_callbacks.append( + cbks.ProgbarLogger( + count_mode, stateful_metrics=self.stateful_metric_names)) + all_callbacks += (callbacks or []) + [self.history] + callbacks = cbks.CallbackList(all_callbacks) out_labels = out_labels or [] # it's possible to callback a different model than self @@ -1186,6 +1209,11 @@ class Model(Network): indices_for_conversion_to_dense.append(i) for epoch in range(initial_epoch, epochs): + # Reset stateful metrics + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + # Update callbacks callbacks.on_epoch_begin(epoch) epoch_logs = {} if steps_per_epoch is not None: @@ -1286,12 +1314,19 @@ class Model(Network): or list of arrays of predictions (if the model has multiple outputs). """ + if hasattr(self, 'metrics'): + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') if verbose == 1: if steps is not None: - progbar = Progbar(target=steps) + progbar = Progbar(target=steps, + stateful_metrics=self.stateful_metric_names) else: - progbar = Progbar(target=num_samples) + progbar = Progbar(target=num_samples, + stateful_metrics=self.stateful_metric_names) indices_for_conversion_to_dense = [] for i in range(len(self._feed_inputs)): @@ -1373,6 +1408,17 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ + if hasattr(self, 'metrics'): + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + stateful_metric_indices = [ + i for i, name in enumerate(self.metrics_names) + if str(name) in self.stateful_metric_names + ] + else: + stateful_metric_indices = [] + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') outs = [] if verbose == 1: @@ -1396,7 +1442,10 @@ class Model(Network): for _ in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out else: if step == 0: outs.append(0.) @@ -1404,7 +1453,8 @@ class Model(Network): if verbose == 1: progbar.update(step + 1) for i in range(len(outs)): - outs[i] /= steps + if i not in stateful_metric_indices: + outs[i] /= steps else: batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) @@ -1425,7 +1475,10 @@ class Model(Network): for batch_out in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out * len(batch_ids) + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out * len(batch_ids) else: if batch_index == 0: outs.append(0.) @@ -1433,7 +1486,8 @@ class Model(Network): if verbose == 1: progbar.update(batch_end) for i in range(len(outs)): - outs[i] /= num_samples + if i not in stateful_metric_indices: + outs[i] /= num_samples if len(outs) == 1: return outs[0] return outs @@ -1655,20 +1709,6 @@ class Model(Network): str(x[0].shape[0]) + ' samples') return x, y, sample_weights - def _get_deduped_metrics_names(self): - out_labels = self.metrics_names - - # Rename duplicated metrics name - # (can happen with an output layer shared among multiple dataflows). - deduped_out_labels = [] - for i, label in enumerate(out_labels): - new_label = label - if out_labels.count(label) > 1: - dup_idx = out_labels[:i].count(label) - new_label += '_' + str(dup_idx + 1) - deduped_out_labels.append(new_label) - return deduped_out_labels - def _set_inputs(self, inputs): """Set model's input and output specs based on the input data received. @@ -1992,7 +2032,7 @@ class Model(Network): ins = x + y + sample_weights # Prepare display labels. - out_labels = self._get_deduped_metrics_names() + out_labels = self.metrics_names if context.in_eager_mode(): if do_validation: @@ -2471,8 +2511,8 @@ class Model(Network): ' the `keras.utils.Sequence` class.') # Prepare display labels. - out_labels = self._get_deduped_metrics_names() - callback_metrics = out_labels + ['val_' + n for n in out_labels] + out_labels = self.metrics_names + callback_metrics = out_labels + ['val_%s' % n for n in out_labels] # prepare callbacks self.history = cbks.History() diff --git a/tensorflow/python/keras/_impl/keras/metrics.py b/tensorflow/python/keras/_impl/keras/metrics.py index 0e2fb6365a..82778a3dc4 100644 --- a/tensorflow/python/keras/_impl/keras/metrics.py +++ b/tensorflow/python/keras/_impl/keras/metrics.py @@ -36,6 +36,7 @@ from tensorflow.python.keras._impl.keras.losses import poisson from tensorflow.python.keras._impl.keras.losses import sparse_categorical_crossentropy from tensorflow.python.keras._impl.keras.losses import squared_hinge from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object +from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.util.tf_export import tf_export @@ -79,13 +80,13 @@ cosine = cosine_proximity @tf_export('keras.metrics.serialize') def serialize(metric): - return metric.__name__ + return serialize_keras_object(metric) @tf_export('keras.metrics.deserialize') -def deserialize(name, custom_objects=None): +def deserialize(config, custom_objects=None): return deserialize_keras_object( - name, + config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='metric function') @@ -93,11 +94,13 @@ def deserialize(name, custom_objects=None): @tf_export('keras.metrics.get') def get(identifier): - if isinstance(identifier, six.string_types): - identifier = str(identifier) - return deserialize(identifier) + if isinstance(identifier, dict): + config = {'class_name': str(identifier), 'config': {}} + return deserialize(config) + elif isinstance(identifier, six.string_types): + return deserialize(str(identifier)) elif callable(identifier): return identifier else: raise ValueError('Could not interpret ' - 'metric function identifier:', identifier) + 'metric function identifier: %s' % identifier) diff --git a/tensorflow/python/keras/_impl/keras/metrics_test.py b/tensorflow/python/keras/_impl/keras/metrics_test.py index f4792f3543..44289ea02a 100644 --- a/tensorflow/python/keras/_impl/keras/metrics_test.py +++ b/tensorflow/python/keras/_impl/keras/metrics_test.py @@ -72,6 +72,77 @@ class KerasMetricsTest(test.TestCase): keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.) + def test_stateful_metrics(self): + np.random.seed(1334) + + class BinaryTruePositives(keras.layers.Layer): + """Stateful Metric to count the total true positives over all batches. + + Assumes predictions and targets of shape `(samples, 1)`. + + Arguments: + threshold: Float, lower limit on prediction value that counts as a + positive class prediction. + name: String, name for the metric. + """ + + def __init__(self, name='true_positives', **kwargs): + super(BinaryTruePositives, self).__init__(name=name, **kwargs) + self.true_positives = keras.backend.variable(value=0, dtype='int32') + + def reset_states(self): + keras.backend.set_value(self.true_positives, 0) + + def __call__(self, y_true, y_pred): + """Computes the number of true positives in a batch. + + Args: + y_true: Tensor, batch_wise labels + y_pred: Tensor, batch_wise predictions + + Returns: + The total number of true positives seen this epoch at the + completion of the batch. + """ + y_true = keras.backend.cast(y_true, 'int32') + y_pred = keras.backend.cast(keras.backend.round(y_pred), 'int32') + correct_preds = keras.backend.cast( + keras.backend.equal(y_pred, y_true), 'int32') + true_pos = keras.backend.cast( + keras.backend.sum(correct_preds * y_true), 'int32') + current_true_pos = self.true_positives * 1 + self.add_update(keras.backend.update_add(self.true_positives, + true_pos), + inputs=[y_true, y_pred]) + return current_true_pos + true_pos + + metric_fn = BinaryTruePositives() + config = keras.metrics.serialize(metric_fn) + metric_fn = keras.metrics.deserialize( + config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) + + # Test on simple model + inputs = keras.Input(shape=(2,)) + outputs = keras.layers.Dense(1, activation='sigmoid')(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=['acc', metric_fn]) + + # Test fit, evaluate + samples = 1000 + x = np.random.random((samples, 2)) + y = np.random.randint(2, size=(samples, 1)) + model.fit(x, y, epochs=1, batch_size=10) + outs = model.evaluate(x, y, batch_size=10) + preds = model.predict(x) + + def ref_true_pos(y_true, y_pred): + return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) + + # Test correctness (e.g. updates should have been run) + self.assertAllClose(outs[2], ref_true_pos(y, preds), atol=1e-5) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 76cf84084f..a13bfe0a92 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt index ea4d514354..454823fd23 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'stateful_metrics\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt index 0e6901f28a..543de0ad48 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'count_mode\'], varargs=None, keywords=None, defaults=[\'samples\'], " + argspec: "args=[\'self\', \'count_mode\', \'stateful_metrics\'], varargs=None, keywords=None, defaults=[\'samples\', \'None\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt index de285c1aab..42729e4237 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt @@ -22,7 +22,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index d8d4eb5ca7..f85b328e34 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" -- GitLab From 5315ff2613acaa288ab818d082a95f37f5f05bb4 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 15 Feb 2018 18:22:21 -0800 Subject: [PATCH 258/629] Bug fix and typo fixes. PiperOrigin-RevId: 185935199 --- tensorflow/python/keras/BUILD | 2 +- .../python/keras/_impl/keras/backend.py | 65 ++-- .../keras/_impl/keras/engine/topology.py | 23 +- .../keras/_impl/keras/engine/topology_test.py | 21 + .../keras/_impl/keras/layers/convolutional.py | 8 +- .../_impl/keras/layers/convolutional_test.py | 363 +++++++++--------- .../python/keras/_impl/keras/testing_utils.py | 30 +- tensorflow/python/layers/convolutional.py | 4 +- tensorflow/python/layers/network.py | 2 +- 9 files changed, 283 insertions(+), 235 deletions(-) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index d97a035256..1956478f39 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -395,7 +395,7 @@ py_test( py_test( name = "convolutional_test", - size = "medium", + size = "large", srcs = ["_impl/keras/layers/convolutional_test.py"], srcs_version = "PY2AND3", tags = [ diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index afa183b0a0..1fa264660d 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -258,7 +258,7 @@ def set_image_data_format(data_format): """ global _IMAGE_DATA_FORMAT if data_format not in {'channels_last', 'channels_first'}: - raise ValueError('Unknown data_format:', data_format) + raise ValueError('Unknown data_format: ' + str(data_format)) _IMAGE_DATA_FORMAT = str(data_format) @@ -342,9 +342,6 @@ def learning_phase(): Returns: Learning phase (scalar integer tensor or Python integer). - - Raises: - ValueError: If called when Eager execution is enabled. """ if context.in_eager_mode(): if 'eager' not in _GRAPH_LEARNING_PHASES: @@ -489,7 +486,7 @@ def _get_available_gpus(): def _has_nchw_support(): """Check whether the current scope supports NCHW ops. - Tensorflow does not support NCHW on CPU. Therefore we check if we are not + TensorFlow does not support NCHW on CPU. Therefore we check if we are not explicitly put on CPU, and have GPUs available. In this case there will be soft-placing on the GPU device. @@ -2233,7 +2230,7 @@ def resize_images(x, height_factor, width_factor, data_format): if original_shape[2] is not None else None, None)) return x else: - raise ValueError('Invalid data_format:', data_format) + raise ValueError('Invalid data_format: ' + str(data_format)) @tf_export('keras.backend.resize_volumes') @@ -2265,7 +2262,7 @@ def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): output = repeat_elements(output, width_factor, axis=3) return output else: - raise ValueError('Invalid data_format:', data_format) + raise ValueError('Invalid data_format: ' + str(data_format)) @tf_export('keras.backend.repeat_elements') @@ -2347,7 +2344,7 @@ def arange(start, stop=None, step=1, dtype='int32'): The function arguments use the same convention as Theano's arange: if only one argument is provided, - it is in fact the "stop" argument. + it is in fact the "stop" argument and "start" is 0. The default type of the returned tensor is `'int32'` to match TensorFlow's default. @@ -2362,7 +2359,7 @@ def arange(start, stop=None, step=1, dtype='int32'): An integer tensor. """ - # Match the behavior of numpy and Theano by returning an empty seqence. + # Match the behavior of numpy and Theano by returning an empty sequence. if stop is None and start < 0: start = 0 result = math_ops.range(start, limit=stop, delta=step, name='arange') @@ -2483,7 +2480,7 @@ def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if data_format == 'channels_first': pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])] @@ -2524,7 +2521,7 @@ def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if data_format == 'channels_first': pattern = [[0, 0], [0, 0], [padding[0][0], padding[0][1]], @@ -2797,7 +2794,7 @@ def function(inputs, outputs, updates=None, **kwargs): for key in kwargs: if (key not in tf_inspect.getargspec(session_module.Session.run)[0] and key not in tf_inspect.getargspec(Function.__init__)[0]): - msg = ('Invalid argument "%s" passed to K.function with Tensorflow ' + msg = ('Invalid argument "%s" passed to K.function with TensorFlow ' 'backend') % key raise ValueError(msg) return Function(inputs, outputs, updates=updates, **kwargs) @@ -2916,7 +2913,7 @@ def rnn(step_function, if unroll: if not inputs.get_shape()[0]: - raise ValueError('Unrolling requires a ' 'fixed number of timesteps.') + raise ValueError('Unrolling requires a fixed number of timesteps.') states = initial_states successive_states = [] successive_outputs = [] @@ -3553,7 +3550,7 @@ def _preprocess_conv3d_input(x, data_format): def _preprocess_padding(padding): - """Convert keras' padding to tensorflow's padding. + """Convert keras' padding to TensorFlow's padding. Arguments: padding: string, one of 'same' , 'valid' @@ -3569,7 +3566,7 @@ def _preprocess_padding(padding): elif padding == 'valid': padding = 'VALID' else: - raise ValueError('Invalid padding:', padding) + raise ValueError('Invalid padding: ' + str(padding)) return padding @@ -3600,7 +3597,7 @@ def conv1d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) kernel_shape = kernel.get_shape().as_list() if padding == 'causal': @@ -3652,7 +3649,7 @@ def conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3699,7 +3696,7 @@ def conv2d_transpose(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if isinstance(output_shape, (tuple, list)): output_shape = array_ops.stack(output_shape) @@ -3758,16 +3755,18 @@ def separable_conv1d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv1d_input(x, data_format) padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) if tf_data_format == 'NHWC': spatial_start_dim = 1 - strides = (1, 1) + strides + (1,) + strides = (1,) + strides * 2 + (1,) else: spatial_start_dim = 2 - strides = (1, 1, 1) + strides + strides = (1, 1) + strides * 2 x = array_ops.expand_dims(x, spatial_start_dim) depthwise_kernel = array_ops.expand_dims(depthwise_kernel, 0) pointwise_kernel = array_ops.expand_dims(pointwise_kernel, 0) @@ -3820,10 +3819,12 @@ def separable_conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) else: @@ -3869,7 +3870,7 @@ def depthwise_conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3919,7 +3920,7 @@ def conv3d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3965,7 +3966,7 @@ def conv3d_transpose(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if isinstance(output_shape, (tuple, list)): output_shape = array_ops.stack(output_shape) @@ -4024,7 +4025,7 @@ def pool2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -4042,7 +4043,7 @@ def pool2d(x, x = nn.avg_pool( x, pool_size, strides, padding=padding, data_format=tf_data_format) else: - raise ValueError('Invalid pooling mode:', pool_mode) + raise ValueError('Invalid pooling mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = array_ops.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW @@ -4077,7 +4078,7 @@ def pool3d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) @@ -4095,7 +4096,7 @@ def pool3d(x, x = nn.avg_pool3d( x, pool_size, strides, padding=padding, data_format=tf_data_format) else: - raise ValueError('Invalid pooling mode:', pool_mode) + raise ValueError('Invalid pooling mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NDHWC': x = array_ops.transpose(x, (0, 4, 1, 2, 3)) @@ -4126,7 +4127,7 @@ def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) stride = strides[0] kernel_shape = int_shape(kernel) @@ -4182,7 +4183,7 @@ def local_conv2d(inputs, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) stride_row, stride_col = strides output_row, output_col = output_shape @@ -4235,7 +4236,7 @@ def bias_add(x, bias, data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) bias_shape = int_shape(bias) if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: raise ValueError( diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index b267fac7df..dd7436e3d0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -1154,10 +1154,8 @@ class Network(tf_network.GraphNetwork, Layer): proceed = ask_to_proceed_with_overwrite(filepath) if not proceed: return - f = h5py.File(filepath, 'w') - save_weights_to_hdf5_group(f, self.layers) - f.flush() - f.close() + with h5py.File(filepath, 'w') as f: + save_weights_to_hdf5_group(f, self.layers) def load_weights(self, filepath, by_name=False): """Loads all layer weights from a HDF5 save file. @@ -1184,16 +1182,13 @@ class Network(tf_network.GraphNetwork, Layer): """ if h5py is None: raise ImportError('`load_weights` requires h5py.') - f = h5py.File(filepath, mode='r') - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - if by_name: - load_weights_from_hdf5_group_by_name(f, self.layers) - else: - load_weights_from_hdf5_group(f, self.layers) - - if hasattr(f, 'close'): - f.close() + with h5py.File(filepath, 'r') as f: + if 'layer_names' not in f.attrs and 'model_weights' in f: + f = f['model_weights'] + if by_name: + load_weights_from_hdf5_group_by_name(f, self.layers) + else: + load_weights_from_hdf5_group(f, self.layers) def _updated_config(self): """Util hared between different serialization methods. diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 0673e42376..28ddc094ee 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -555,6 +555,27 @@ class TopologyConstructionTest(test.TestCase): model = keras.models.Model(a, b) self.assertEqual(model.output_mask.get_shape().as_list(), [None, 10]) + def test_activity_regularization_with_model_composition(self): + + def reg(x): + return keras.backend.sum(x) + + net_a_input = keras.Input((2,)) + net_a = net_a_input + net_a = keras.layers.Dense(2, kernel_initializer='ones', + use_bias=False, + activity_regularizer=reg)(net_a) + model_a = keras.Model([net_a_input], [net_a]) + + net_b_input = keras.Input((2,)) + net_b = model_a(net_b_input) + model_b = keras.Model([net_b_input], [net_b]) + + model_b.compile(optimizer='sgd', loss=None) + x = np.ones((1, 2)) + loss = model_b.evaluate(x) + self.assertEqual(loss, 4.) + def test_weight_preprocessing(self): input_dim = 3 output_dim = 3 diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py index bc43451114..162ae6c28f 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py @@ -60,7 +60,7 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, @@ -173,7 +173,7 @@ class Conv2D(tf_convolutional_layers.Conv2D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for @@ -308,7 +308,7 @@ class Conv3D(tf_convolutional_layers.Conv3D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for @@ -877,7 +877,7 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py index 39c9d4f0fb..4a6228121b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy + import numpy as np from tensorflow.python.keras._impl import keras @@ -27,45 +29,39 @@ from tensorflow.python.platform import test class Convolution1DTest(test.TestCase): - def test_dilated_conv1d(self): - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv1D, - input_data=np.reshape(np.arange(4, dtype='float32'), (1, 4, 1)), - kwargs={ - 'filters': 1, - 'kernel_size': 2, - 'dilation_rate': 1, - 'padding': 'valid', - 'kernel_initializer': 'ones', - 'use_bias': False, - }, - expected_output=[[[1], [3], [5]]]) - - def test_conv_1d(self): - batch_size = 2 - steps = 8 - input_dim = 2 - kernel_size = 3 - filters = 3 + def _run_test(self, kwargs, arg, values): + num_samples = 2 + stack_size = 3 + length = 7 - for padding in ['valid', 'same']: - for strides in [1, 2]: - if padding == 'same' and strides != 1: - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv1D, + kwargs=test_kwargs, + input_shape=(num_samples, length, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv1D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides - }, - input_shape=(batch_size, steps, input_dim)) - - def test_conv_1d_regularizers(self): + def test_conv1d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + self._run_test(kwargs, 'dilation_rate', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'dilation_rate', [3]) + + def test_conv1d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -82,7 +78,7 @@ class Convolution1DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_conv_1d_constraints(self): + def test_conv1d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x @@ -103,35 +99,43 @@ class Convolution1DTest(test.TestCase): class Conv2DTest(test.TestCase): - def test_convolution_2d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - kernel_size = (3, 2) num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv2D, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - if test.is_gpu_available(cuda_only=True): - testing_utils.layer_test( - keras.layers.Conv2D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_first' - }, - input_shape=(num_samples, stack_size, num_row, num_col)) - - def test_convolution_2d_regularizers(self): + def test_conv2d(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2)]) + if test.is_gpu_available(cuda_only=True): + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + self._run_test(kwargs, 'data_format', ['channels_first']) + self._run_test(kwargs, 'dilation_rate', [(2, 2)]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) + + def test_conv2d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -148,7 +152,7 @@ class Conv2DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_convolution_2d_constraints(self): + def test_conv2d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x @@ -166,51 +170,34 @@ class Conv2DTest(test.TestCase): self.assertEqual(layer.kernel.constraint, k_constraint) self.assertEqual(layer.bias.constraint, b_constraint) - def test_dilated_conv_2d(self): - num_samples = 2 - filters = 2 - stack_size = 3 - kernel_size = (3, 2) - num_row = 7 - num_col = 6 - - # Test dilation - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv2D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'dilation_rate': (2, 2) - }, - input_shape=(num_samples, num_row, num_col, stack_size)) - class Conv2DTransposeTest(test.TestCase): - def test_conv2d_transpose(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - num_row = 5 + num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv2DTranspose, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv2DTranspose, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_last' - }, - input_shape=(num_samples, num_row, num_col, stack_size)) + def test_conv2dtranspose(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) def test_conv2dtranspose_regularizers(self): kwargs = { @@ -250,30 +237,32 @@ class Conv2DTransposeTest(test.TestCase): class Conv3DTransposeTest(test.TestCase): - def test_conv3d_transpose(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - num_row = 5 + num_row = 7 num_col = 6 - depth = 4 + depth = 5 - for padding in ['valid', 'same']: - for strides in [(1, 1, 1), (2, 2, 2)]: - if padding == 'same' and strides != (1, 1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs=test_kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_last' - }, - input_shape=(num_samples, depth, num_row, num_col, stack_size)) + def test_conv3dtranspose(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) def test_conv3dtranspose_regularizers(self): kwargs = { @@ -313,29 +302,37 @@ class Conv3DTransposeTest(test.TestCase): class SeparableConv1DTest(test.TestCase): - def test_separable_conv_1d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 6 stack_size = 3 length = 7 - strides = 1 - for padding in ['valid', 'same']: - for multiplier in [1, 2]: - if padding == 'same' and strides != 1: - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv1D, + kwargs=test_kwargs, + input_shape=(num_samples, length, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.SeparableConv1D, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier - }, - input_shape=(num_samples, length, stack_size)) + def test_separable_conv1d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'depth_multiplier', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) def test_separable_conv1d_regularizers(self): kwargs = { @@ -379,30 +376,40 @@ class SeparableConv1DTest(test.TestCase): class SeparableConv2DTest(test.TestCase): - def test_separable_conv_2d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 6 stack_size = 3 num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - for multiplier in [1, 2]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv2D, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.SeparableConv2D, - kwargs={ - 'filters': filters, - 'kernel_size': (3, 3), - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier - }, - input_shape=(num_samples, num_row, num_col, stack_size)) + def test_separable_conv2d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'depth_multiplier', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) def test_separable_conv2d_regularizers(self): kwargs = { @@ -446,33 +453,35 @@ class SeparableConv2DTest(test.TestCase): class Conv3DTest(test.TestCase): - def test_convolution_3d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 + num_row = 7 + num_col = 6 + depth = 5 - input_len_dim1 = 9 - input_len_dim2 = 8 - input_len_dim3 = 8 + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv3D, + kwargs=test_kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size)) - for padding in ['valid', 'same']: - for strides in [(1, 1, 1), (2, 2, 2)]: - if padding == 'same' and strides != (1, 1, 1): - continue + def test_conv3d(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3, 3), + } - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Convolution3D, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides - }, - input_shape=(num_samples, input_len_dim1, input_len_dim2, - input_len_dim3, stack_size)) - - def test_convolution_3d_regularizers(self): + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2, 2)]) + self._run_test(kwargs, 'dilation_rate', [(2, 2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) + + def test_conv3d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -490,7 +499,7 @@ class Conv3DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_convolution_3d_constraints(self): + def test_conv3d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x diff --git a/tensorflow/python/keras/_impl/keras/testing_utils.py b/tensorflow/python/keras/_impl/keras/testing_utils.py index b889e311b3..fa1ee2fa3d 100644 --- a/tensorflow/python/keras/_impl/keras/testing_utils.py +++ b/tensorflow/python/keras/_impl/keras/testing_utils.py @@ -105,8 +105,14 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, # test in functional API x = keras.layers.Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) - assert keras.backend.dtype(y) == expected_output_dtype - + if keras.backend.dtype(y) != expected_output_dtype: + raise AssertionError('When testing layer %s, for input %s, found output ' + 'dtype=%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + keras.backend.dtype(y), + expected_output_dtype, + kwargs)) # check shape inference model = keras.models.Model(x, y) expected_output_shape = tuple( @@ -117,7 +123,15 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: - assert expected_dim == actual_dim + if expected_dim != actual_dim: + raise AssertionError( + 'When testing layer %s, for input %s, found output_shape=' + '%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + actual_output_shape, + expected_output_shape, + kwargs)) if expected_output is not None: np.testing.assert_allclose(actual_output, expected_output, rtol=1e-3) @@ -146,7 +160,15 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: - assert expected_dim == actual_dim + if expected_dim != actual_dim: + raise AssertionError( + 'When testing layer %s, for input %s, found output_shape=' + '%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + actual_output_shape, + expected_output_shape, + kwargs)) if expected_output is not None: np.testing.assert_allclose(actual_output, expected_output, rtol=1e-3) diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 689046fe78..bb10fe5e8b 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -1096,10 +1096,10 @@ class SeparableConv1D(_SeparableConv): def call(self, inputs): if self.data_format == 'channels_last': - strides = (1, 1) + self.strides + (1,) + strides = (1,) + self.strides * 2 + (1,) spatial_start_dim = 1 else: - strides = (1, 1, 1) + self.strides + strides = (1, 1) + self.strides * 2 spatial_start_dim = 2 # Explicitly broadcast inputs and kernels to 4D. diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py index eeb3276f0c..9f16559687 100644 --- a/tensorflow/python/layers/network.py +++ b/tensorflow/python/layers/network.py @@ -977,7 +977,7 @@ class GraphNetwork(base.Layer): if context.in_graph_mode(): if layer.activity_regularizer is not None: regularization_losses = [ - layer.activity_regularizer(x) for x in computed_tensors + layer.activity_regularizer(x) for x in output_tensors ] # Apply activity regularizer if any: layer.add_loss(regularization_losses, computed_tensors) -- GitLab From 72bd433b9b6b06ae13893015361079dda992d3c8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Thu, 15 Feb 2018 18:55:22 -0800 Subject: [PATCH 259/629] Add a new tag no_cuda_on_cpu_tap for excluding failing non-gpu cuda tests. PiperOrigin-RevId: 185937687 --- tensorflow/core/debug/BUILD | 5 ++++- tensorflow/core/grappler/clusters/BUILD | 5 ++++- tensorflow/core/kernels/BUILD | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index a32badef6d..40cb8353cd 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -196,7 +196,10 @@ tf_cc_test( srcs = ["debug_gateway_test.cc"], args = ["--heap_check=local"], linkstatic = tf_kernel_tests_linkstatic(), - tags = ["no_gpu"], + tags = [ + "no_cuda_on_cpu_tap", + "no_gpu", + ], deps = [ ":debug", ":debug_gateway_internal", diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index 5b8ce373bc..b8f8e13c9a 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -114,7 +114,10 @@ tf_cc_test( name = "single_machine_test", srcs = ["single_machine_test.cc"], args = ["--heap_check=local"], # The GPU tracer leaks memory - tags = ["no_gpu"], + tags = [ + "no_cuda_on_cpu_tap", + "no_gpu", + ], deps = [ ":single_machine", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index cee3c55d1a..dc93c76eae 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -987,6 +987,7 @@ tf_cuda_cc_test( name = "constant_op_test", size = "small", srcs = ["constant_op_test.cc"], + tags = ["no_cuda_on_cpu_tap"], deps = [ ":constant_op", ":ops_testutil", -- GitLab From 98cf337e781977fd464c574656699b3181eddf19 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Thu, 15 Feb 2018 19:12:05 -0800 Subject: [PATCH 260/629] TFE SPINN example: use tensor instead of numpy array in inference output. PiperOrigin-RevId: 185939805 --- .../contrib/eager/python/examples/spinn/spinn_test.py | 2 +- third_party/examples/eager/spinn/README.md | 4 ++-- third_party/examples/eager/spinn/spinn.py | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index eefc06d90d..081b0af14f 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -369,7 +369,7 @@ class SpinnTest(test_util.TensorFlowTestCase): inference_sentences=("( foo ( bar . ) )", "( bar ( foo . ) )")) logits = spinn.train_or_infer_spinn( embed, word2index, None, None, None, config) - self.assertEqual(np.float32, logits.dtype) + self.assertEqual(tf.float32, logits.dtype) self.assertEqual((3,), logits.shape) def testInferSpinnThrowsErrorIfOnlyOneSentenceIsSpecified(self): diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md index 335c0fa3b5..7f477d1920 100644 --- a/third_party/examples/eager/spinn/README.md +++ b/third_party/examples/eager/spinn/README.md @@ -75,7 +75,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth should all be separated by spaces. For instance, ```bash - pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ --inference_hypothesis '( ( The dog ) ( moves . ) )' ``` @@ -93,7 +93,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth By contrast, the following sentence pair: ```bash - pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ --inference_hypothesis '( ( The dog ) ( rests . ) )' ``` diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py index 38ba48d501..8a1c7db2ea 100644 --- a/third_party/examples/eager/spinn/spinn.py +++ b/third_party/examples/eager/spinn/spinn.py @@ -44,7 +44,6 @@ import os import sys import time -import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf @@ -567,7 +566,7 @@ def train_or_infer_spinn(embed, Returns: If `config.inference_premise ` and `config.inference_hypothesis` are not `None`, i.e., inference mode: the logits for the possible labels of the - SNLI data set, as numpy array of three floats. + SNLI data set, as a `Tensor` of three floats. else: The trainer object. Raises: @@ -626,8 +625,8 @@ def train_or_infer_spinn(embed, inference_logits = model( # pylint: disable=not-callable tf.constant(prem), tf.constant(prem_trans), tf.constant(hypo), tf.constant(hypo_trans), training=False) - inference_logits = np.array(inference_logits[0][1:]) - max_index = np.argmax(inference_logits) + inference_logits = inference_logits[0][1:] + max_index = tf.argmax(inference_logits) print("\nInference logits:") for i, (label, logit) in enumerate( zip(data.POSSIBLE_LABELS, inference_logits)): -- GitLab From 4de211808b81a2af42a38b011a19ab5ef67795bc Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Feb 2018 19:31:11 -0800 Subject: [PATCH 261/629] Error out when building XLA's CPU and GPU backends with fast-math In an ideal world this won't make a difference since the compiler should be disciplined about not leaking host-level optimization artifacts into generated code. However, I think this provides some defense-in-depth in preventing fast-math optimization on the host side from messing up floating point constants etc. we want to embed into generated code. PiperOrigin-RevId: 185941549 --- tensorflow/compiler/xla/service/llvm_compiler.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index f98fc0400a..68c35c0c1f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_compiler.h" +#ifdef __FAST_MATH__ +#error "Don't build XLA with -ffast-math" +#endif + namespace xla { StatusOr>> LLVMCompiler::Compile( std::vector> modules, -- GitLab From 480fbfda31e4b1fc10d537264a0c9f1c9c5994f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:34:18 -0800 Subject: [PATCH 262/629] Add tuple targets to the context handling mechanism in templates. PiperOrigin-RevId: 185941851 --- tensorflow/contrib/py2tf/pyct/templates.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index c40e4d0fb7..6ee6c0c5ce 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -68,6 +68,10 @@ class ReplaceTransformer(gast.NodeTransformer): if isinstance(node, gast.Attribute): self._set_inner_child_context(node.value, ctx) node.ctx = gast.Load() + elif isinstance(node, gast.Tuple): + for e in node.elts: + self._set_inner_child_context(e, ctx) + node.ctx = ctx elif isinstance(node, gast.Name): node.ctx = ctx elif isinstance(node, (gast.Str, gast.Num)): -- GitLab From 5827bdb5bd8f83f0617693bbe2caca253a13c7ed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:47:47 -0800 Subject: [PATCH 263/629] Fix handling of types in RNN state import. Sanitize TF node names. PiperOrigin-RevId: 185942921 --- .../contrib/lite/toco/export_tensorflow.cc | 10 +-- .../contrib/lite/toco/import_tensorflow.cc | 2 +- tensorflow/contrib/lite/toco/model.h | 6 +- tensorflow/contrib/lite/toco/toco_tooling.cc | 8 ++- tensorflow/contrib/lite/toco/tooling_util.cc | 64 +++++++++++++++---- tensorflow/contrib/lite/toco/tooling_util.h | 17 +++++ 6 files changed, 85 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 7dc36a6d13..570cc7943b 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1236,8 +1236,9 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op, // Write weights const string weights_output = base + "weights"; CHECK(model.HasArray(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT])); - const auto& weights_array = - model.GetArray(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT]); + const string weights_name = WalkUpToConstantArray( + model, src_op.inputs[LstmCellOperator::WEIGHTS_INPUT]); + const auto& weights_array = model.GetArray(weights_name); // Convert 4D FullyConnected weights into 2D matrix const auto& weights_shape = weights_array.shape(); CHECK_EQ(weights_shape.dimensions_count(), 2); @@ -1262,8 +1263,9 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op, // Write biases const string biases_output = base + "biases"; CHECK(model.HasArray(src_op.inputs[LstmCellOperator::BIASES_INPUT])); - const auto& bias_array = - model.GetArray(src_op.inputs[LstmCellOperator::BIASES_INPUT]); + const string bias_name = WalkUpToConstantArray( + model, src_op.inputs[LstmCellOperator::BIASES_INPUT]); + const auto& bias_array = model.GetArray(bias_name); // TODO(b/62904716) Bias arrays should be 1-D, and used directly. Shape bias_shape_1d = bias_array.shape(); UnextendShape(&bias_shape_1d, 1); diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 330506200c..9c01b67420 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1582,7 +1582,7 @@ void ConvertFloorDivOperator(const NodeDef& node, void ConvertFloorModOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { - CHECK(node.op() == "FloorMod"); + CHECK_EQ(node.op(), "FloorMod"); CheckInputsCount(node, tf_import_flags, 2); auto* op = new FloorModOperator; op->inputs.push_back(node.input(0)); diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 2bcd6da3da..c55bf664f8 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -160,17 +160,17 @@ enum class AxesOrder { // may be involved only in debug-only subgraphs that we may not be interested // in actually supporting). enum class ArrayDataType { - kNone, + kNone, // 0 kBool, kFloat, kInt8, kUint8, - kInt16, + kInt16, // 5 kUint16, kInt32, kUint32, kInt64, - kUint64, + kUint64, // 10 kString }; diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 864c646a8c..1b836fbc15 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -189,6 +189,11 @@ std::unique_ptr Import(const TocoFlags& toco_flags, } void Transform(const TocoFlags& toco_flags, Model* model) { + // Clean up after import. + SetFinalDataTypeOnInputs(toco_flags, model); + UseArraysExtraInfo(model); + FinishBuildingRNNStates(model); + const FileFormat output_format = toco_flags.output_format(); const IODataType inference_type = toco_flags.inference_type(); @@ -200,9 +205,6 @@ void Transform(const TocoFlags& toco_flags, Model* model) { << "Quantized inference is not allowed with float inputs."; } - SetFinalDataTypeOnInputs(toco_flags, model); - UseArraysExtraInfo(model); - // Remove unused ops before performing any other optimizations. This is to // stop optimizations from crossing the input/output boundaries. For example // this will stop BatchNorm fusing if the output node is in between a conv diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 249c03ca3c..dcb409c84d 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -25,6 +25,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" #include "tensorflow/contrib/lite/toco/dump_graphviz.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" @@ -633,6 +634,14 @@ bool IsConstantParameterArray(const Model& model, const string& name) { } namespace { +// Take an array name, which may be something like "name:3_5" and make it +// acceptable as a TF node name, say "name_3_5"; +string SanitizeNameForTFNode(const string& array_name) { + auto node_name = array_name; + std::replace(node_name.begin(), node_name.end(), ':', '_'); + return node_name; +} + void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) { for (const auto& input_array : model_flags.input_arrays()) { for (const string& output_array : model_flags.output_arrays()) { @@ -796,7 +805,10 @@ void FixNoOrphanedArray(Model* model) { } } -void CheckArrayFieldsConsistent(const Model& model) { +// Apply checks to arrays individually (for-each fashion). +// +// Check consistency of array fields, check name. +void CheckEachArray(const Model& model) { for (const auto& array_entry : model.GetArrayMap()) { const auto& array = array_entry.second; if (array->has_shape()) { @@ -811,6 +823,18 @@ void CheckArrayFieldsConsistent(const Model& model) { if (array->buffer) { CHECK(array->buffer->type == array->data_type); } + + // Check name. Either "name_with_suffix_8", "name_with_port:3", but not + // "name_with_both:3_8". + const string& name = array_entry.first; + auto colon_pos = name.find_first_of(":"); + if (colon_pos != string::npos) { + CHECK_EQ(name.substr(colon_pos + 1).find_first_not_of("0123456789"), + string::npos) + << "Array name must only have digits after colon"; + } + CHECK_GT(colon_pos, 0) + << "First character of array name must not be a colon."; } } @@ -959,7 +983,7 @@ void CheckInvariants(const Model& model) { CheckNonAsciiIOArrays(model.flags); CheckNoMissingArray(model); CheckNoOrphanedArray(model); - CheckArrayFieldsConsistent(model); + CheckEachArray(model); CheckOperatorOrdering(model); } @@ -1051,9 +1075,6 @@ void CreateOrCheckRnnStateArray(const string& name, int size, Model* model) { if (array.has_shape()) { num_dims = array.shape().dimensions_count(); } - CHECK(array.data_type == ArrayDataType::kFloat || - array.data_type == ArrayDataType::kNone); - array.data_type = ArrayDataType::kFloat; if (!array.has_shape() && num_dims >= 0) { Shape* shape = array.mutable_shape(); std::vector dims; @@ -1077,7 +1098,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { } } if (!dst_input_array) { - // specified_input_array from model_flags is not found in model->flags. + // Specified_input_array from model_flags is not found in model->flags. // Match a name-less specified input array when there can be no ambiguity // as there is only 1 input array. if (model->flags.input_arrays_size() == 1 && @@ -1384,19 +1405,23 @@ bool IsAllocatableTransientArray(const Model& model, const string& array_name) { } string AvailableArrayName(const Model& model, const string& name) { - if (!model.HasArray(name) && !model.IsOptionalArray(name)) { - return name; + string sanitized_name = SanitizeNameForTFNode(name); + if (!model.HasArray(sanitized_name) && + !model.IsOptionalArray(sanitized_name)) { + return sanitized_name; } const int kNumSuffixesToTry = 1000; for (int i = 0; i < kNumSuffixesToTry; i++) { - const string& name_with_suffix = toco::port::StringF("%s_%d", name, i); + const string& name_with_suffix = + toco::port::StringF("%s_%d", sanitized_name, i); if (!model.HasArray(name_with_suffix) && !model.IsOptionalArray(name_with_suffix)) { return name_with_suffix; } } - LOG(FATAL) << "Could not find an available array name starting with " << name - << ". Tried " << kNumSuffixesToTry << " suffixes, all were taken!"; + LOG(FATAL) << "Could not find an available array name starting with " + << sanitized_name << ". Tried " << kNumSuffixesToTry + << " suffixes, all were taken!"; return ""; } @@ -1795,6 +1820,23 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { } } +void FinishBuildingRNNStates(Model* model) { + for (const auto& rnn_state : model->flags.rnn_states()) { + if (!model->HasArray(rnn_state.back_edge_source_array()) || + !model->HasArray(rnn_state.state_array())) { + CHECK(model->HasArray(rnn_state.back_edge_source_array())); + CHECK(model->HasArray(rnn_state.state_array())); + continue; + } + const auto& src_array = model->GetArray(rnn_state.back_edge_source_array()); + auto& dst_array = model->GetArray(rnn_state.state_array()); + if (src_array.data_type == ArrayDataType::kNone && + dst_array.data_type == ArrayDataType::kNone) { + dst_array.data_type = ArrayDataType::kFloat; + } + } +} + void UseArraysExtraInfo(Model* model) { for (const auto& entry : model->flags.arrays_extra_info().entries()) { QCHECK(model->HasArray(entry.name())) diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index a2dde09156..0aaa0f6a21 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -299,6 +299,23 @@ void CheckFinalDataTypesSatisfied(const Model& model); ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type); +// The process of building models varies according to the import format. +// +// (a) In some cases, such as model-proto format, the model should be fully +// specified. In these cases, no extra action should be taken by this function. +// (b) In other cases, such as TF graphdef format, the desired types of RNN +// arrays are not specified directly in the model, neither can they be inferred. +// However, we can set the types of RNN destination arrays to float. This breaks +// any cycles such as when resolution of the type of an RNN source array depends +// on the type of its destination array. +// +// This function is applied after the main import, after resolution of flags and +// after application of ArraysExtraInfo. It only defaults destination RNN arrays +// to float. If the model is subsequently quantized, it is assumed that the +// model contains sufficient information for that to be completed. If it is +// already quantized, then case (a) should hold. +void FinishBuildingRNNStates(Model* model); + void UseArraysExtraInfo(Model* model); } // namespace toco -- GitLab From 018980f7aa02d023ad4574fcb92b8be7ff3cfbbb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:52:03 -0800 Subject: [PATCH 264/629] optimized quantized softmax PiperOrigin-RevId: 185943132 --- .../internal/optimized/optimized_ops.h | 237 +++++++++++++----- 1 file changed, 178 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index cd52385f41..7af07e5d0c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2866,74 +2866,193 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax"); + gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - uint8 max_in_row = 0; - for (int c = 0; c < depth; ++c) { - max_in_row = - std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); - } + const int outer_size = batches * height * width; - FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - sum_of_exps = - sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); - } + for (int b = 0; b < outer_size; ++b) { + const uint8* input_data_ptr = input_data + b * depth; + uint8* output_data_ptr = output_data + b * depth; + + // Determine the largest entry in the current row + uint8 max_in_row = 0; + { + int c = 0; +#ifdef USE_NEON + uint8x16_t max16_0 = vdupq_n_u8(0); + uint8x16_t max16_1 = vdupq_n_u8(0); + for (; c <= depth - 32; c += 32) { + max16_0 = vmaxq_u8(max16_0, vld1q_u8(input_data_ptr + c + 0)); + max16_1 = vmaxq_u8(max16_1, vld1q_u8(input_data_ptr + c + 16)); + } + uint8x16_t max16 = vmaxq_u8(max16_0, max16_1); + if (c <= depth - 16) { + max16 = vmaxq_u8(max16, vld1q_u8(input_data_ptr + c)); + c += 16; + } + uint8x8_t max8 = vmax_u8(vget_low_u8(max16), vget_high_u8(max16)); + if (c <= depth - 8) { + max8 = vmax_u8(max8, vld1_u8(input_data_ptr + c)); + c += 8; + } + uint8x8_t max4 = vmax_u8(max8, vext_u8(max8, max8, 4)); + uint8x8_t max2 = vmax_u8(max4, vext_u8(max4, max4, 2)); + uint8x8_t max1 = vpmax_u8(max2, max2); + max_in_row = vget_lane_u8(max1, 0); +#endif + for (; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data_ptr[c]); + } + } + +#ifdef USE_NEON + using FixedPointAccumInt32x4 = + gemmlowp::FixedPoint; + using FixedPointScaledDiffInt32x4 = + gemmlowp::FixedPoint; + using FixedPoint0Int32x4 = gemmlowp::FixedPoint; + FixedPoint0Int32x4 input_beta_multiplier_f0 = + FixedPoint0Int32x4::FromScalarRaw(input_beta_multiplier); + int16x8_t max_in_row_s16 = vdupq_n_s16(max_in_row); +#endif + + // Compute the sum of exponentials of the differences of entries in the + // current row from the largest entry in the current row. + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + { + int c = 0; +#ifdef USE_NEON + int32x4_t diff_min_s32 = vdupq_n_s32(diff_min); + FixedPointAccumInt32x4 sum_of_exps_0 = FixedPointAccumInt32x4::Zero(); + FixedPointAccumInt32x4 sum_of_exps_1 = FixedPointAccumInt32x4::Zero(); + FixedPointAccumInt32x4 zeros = FixedPointAccumInt32x4::Zero(); + for (; c <= depth - 8; c += 8) { + uint16x8_t input_u16 = vmovl_u8(vld1_u8(input_data_ptr + c)); + int16x8_t input_diff_s16 = + vsubq_s16(vreinterpretq_s16_u16(input_u16), max_in_row_s16); + int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); + int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); + int32x4_t mask_0 = vcgeq_s32(input_diff_s32_0, diff_min_s32); + int32x4_t mask_1 = vcgeq_s32(input_diff_s32_1, diff_min_s32); + FixedPointScaledDiffInt32x4 scaled_diff_0 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_0, input_beta_left_shift)); + FixedPointScaledDiffInt32x4 scaled_diff_1 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_1, input_beta_left_shift)); + FixedPointAccumInt32x4 exps_0 = + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_0)); + FixedPointAccumInt32x4 exps_1 = + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_1)); + FixedPointAccumInt32x4 masked_exps_0 = + SelectUsingMask(mask_0, exps_0, zeros); + FixedPointAccumInt32x4 masked_exps_1 = + SelectUsingMask(mask_1, exps_1, zeros); + sum_of_exps_0 = sum_of_exps_0 + masked_exps_0; + sum_of_exps_1 = sum_of_exps_1 + masked_exps_1; + } + int32x4_t sum_of_exps_reduced_4 = (sum_of_exps_0 + sum_of_exps_1).raw(); + int32x2_t sum_of_exps_reduced_2 = + vadd_s32(vget_low_s32(sum_of_exps_reduced_4), + vget_high_s32(sum_of_exps_reduced_4)); + int32x2_t sum_of_exps_reduced_1 = + vpadd_s32(sum_of_exps_reduced_2, sum_of_exps_reduced_2); + sum_of_exps = + FixedPointAccum::FromRaw(vget_lane_s32(sum_of_exps_reduced_1, 0)); +#endif + for (; c < depth; ++c) { + int32 input_diff = static_cast(input_data_ptr[c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = + sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); } + } + } - int32 fixed_sum_of_exps = sum_of_exps.raw(); - // TODO(starka): Use a NEON intrinsic like vclzq_u32 instead. - int headroom_plus_one = - __builtin_clz(static_cast(fixed_sum_of_exps)); - // This is the number of bits to the left of the binary point above 1.0. - // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and - // no later adjustment will be needed. - int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; - int32 shifted_sum_minus_one = static_cast( - (static_cast(fixed_sum_of_exps) << headroom_plus_one) - - (static_cast(1) << 31)); - - FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( - FixedPoint0::FromRaw(shifted_sum_minus_one)); + // Compute the fixed-point multiplier and shift that we need to apply to + // perform a division by the above-computed sum-of-exponentials. + int32 fixed_sum_of_exps = sum_of_exps.raw(); + int headroom_plus_one = + __builtin_clz(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + // Compute the quotients of exponentials of differences of entries in the + // current row from the largest entry, over the previously-computed sum of + // exponentials. + { + int c = 0; +#ifdef USE_NEON + int16x8_t diff_min_s16 = vdupq_n_s16(diff_min); + for (; c <= depth - 8; c += 8) { + uint16x8_t input_u16 = vmovl_u8(vld1_u8(input_data_ptr + c)); + int16x8_t input_diff_s16 = + vsubq_s16(vreinterpretq_s16_u16(input_u16), max_in_row_s16); + int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); + int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); + uint8x8_t mask = vmovn_u16( + vreinterpretq_u16_s16(vcgeq_s16(input_diff_s16, diff_min_s16))); + FixedPointScaledDiffInt32x4 scaled_diff_0 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_0, input_beta_left_shift)); + FixedPointScaledDiffInt32x4 scaled_diff_1 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_1, input_beta_left_shift)); + FixedPoint0Int32x4 exp_0 = exp_on_negative_values(scaled_diff_0); + FixedPoint0Int32x4 exp_1 = exp_on_negative_values(scaled_diff_1); + int32x4_t output_s32_0 = gemmlowp::RoundingDivideByPOT( + vqrdmulhq_n_s32(exp_0.raw(), shifted_scale.raw()), + num_bits_over_unit + 31 - 8); + int32x4_t output_s32_1 = gemmlowp::RoundingDivideByPOT( + vqrdmulhq_n_s32(exp_1.raw(), shifted_scale.raw()), + num_bits_over_unit + 31 - 8); + int16x8_t output_s16 = + vcombine_s16(vqmovn_s32(output_s32_0), vqmovn_s32(output_s32_1)); + uint8x8_t output_u8 = vqmovun_s16(output_s16); + uint8x8_t masked_output = vbsl_s16(mask, output_u8, vdup_n_u8(0)); + vst1_u8(output_data_ptr + c, masked_output); + } +#endif + for (; c < depth; ++c) { + int32 input_diff = static_cast(input_data_ptr[c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - - FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); - int32 unsat_output = gemmlowp::RoundingDivideByPOT( - (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); - - output_data[Offset(output_dims, c, x, y, b)] = - std::max(std::min(unsat_output, 255), 0); - - } else { - output_data[Offset(output_dims, c, x, y, b)] = 0; - } + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data_ptr[c] = std::max(std::min(unsat_output, 255), 0); + + } else { + output_data_ptr[c] = 0; } } } -- GitLab From cae5adce103849287e48a122b203d71600c7a6ff Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Thu, 15 Feb 2018 20:18:11 -0800 Subject: [PATCH 265/629] Automated g4 rollback of changelist 185891869 PiperOrigin-RevId: 185944719 --- .../xla/service/algebraic_simplifier.cc | 141 ---------- .../xla/service/algebraic_simplifier_test.cc | 203 --------------- .../compiler/xla/tests/dot_operation_test.cc | 246 ------------------ 3 files changed, 590 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 6f6c2391f3..fb857559f9 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -284,8 +284,6 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim, HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped); - StatusOr OptimizeDotOfGather(HloInstruction* dot); - // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -919,134 +917,6 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( return add_result; } -StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( - HloInstruction* dot) { - const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); - if (dnums.lhs_contracting_dimensions_size() != 1 || - dnums.rhs_contracting_dimensions_size() != 1 || - dnums.lhs_batch_dimensions_size() != 0 || - dnums.rhs_batch_dimensions_size() != 0 || - dot->shape().dimensions_size() != 2) { // dot output 2D - VLOG(10) << "DotOfGather: Can only optimize 2D, non-batch dot operations."; - return nullptr; - } - - // Optimize either dot(DS(ctA), ctB)) or dot(ctB, DS(ctA)). - // Currently a Gather is a DynamicSlice. - auto is_dynamic_slice_constant_combination = - [](HloInstruction* a, HloInstruction* b, int a_contracting_dimension) { - // First operand is a DynamicSlice(Constant). - if (a->opcode() != HloOpcode::kDynamicSlice) { - return false; - } - auto* dynamic_slice_op = a->operand(0); - if (dynamic_slice_op->opcode() != HloOpcode::kConstant) { - return false; - } - // Second operand is a Constant. - if (b->opcode() != HloOpcode::kConstant) { - return false; - } - // The DynamicSlice output is a vector. - const Shape& dynamic_slice_shape = a->shape(); - if (dynamic_slice_shape.dimensions(1 - a_contracting_dimension) != 1) { - return false; - } - // Constant size is the same before and after slice in the contracting - // dimension, otherwise we either must precompute for all possible slice - // indices or dot is invalid. - const Shape& dynamic_slice_op_shape = dynamic_slice_op->shape(); - if (dynamic_slice_op_shape.dimensions(a_contracting_dimension) != - dynamic_slice_shape.dimensions(a_contracting_dimension)) { - return false; - } - return true; - }; - - HloInstruction* lhs = dot->mutable_operand(0); - HloInstruction* rhs = dot->mutable_operand(1); - int lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); - int rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); - - if (!is_dynamic_slice_constant_combination( - lhs, rhs, /*a_contracting_dimension=*/lhs_contracting_dimension) && - !is_dynamic_slice_constant_combination( - rhs, lhs, /*a_contracting_dimension=*/rhs_contracting_dimension)) { - VLOG(10) << "DotOfGather: Can only optimize dot(DS(ctA), ctB)) or " - "dot(ctB, DS(ctA)), where the two constants have equal " - "contracting dimensions."; - return nullptr; - } - - // LHS is DynamicSlice: - // input: dot(DS(ctA), ctB)) - // where DS(ctA) = DS({M x K}, {start, 0}, {1, K}) and ctB = {K x N}. - // => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. - // output: DS(dot(ctA, ctB)) - // => output dimensions: DS ({M x N}, {start, 0}, {1, N}) => {1 x N}. - - // RHS is DynamicSlice: - // input: dot(ctA, DS(ctB)) - // where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, start}, {K, 1}). - // => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. - // output: DS(dot(ctA, ctB)) - // => output dimensions: DS ({M x N}, {0, start}, {M, 1}) => {M x 1}. - - bool lhs_is_dynamic_slice = lhs->opcode() == HloOpcode::kDynamicSlice; - - // ctA: - HloInstruction* left_operand = - lhs_is_dynamic_slice ? lhs->mutable_operand(0) : lhs; - // ctB: - HloInstruction* right_operand = - lhs_is_dynamic_slice ? rhs : rhs->mutable_operand(0); - // Build ctA x ctB. - const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); - const int n = - right_operand->shape().dimensions(1 - rhs_contracting_dimension); - auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); - auto* memoized_inst = computation_->AddInstruction(HloInstruction::CreateDot( - memoized_shape, left_operand, right_operand, dnums)); - // Get pair {start, 0} or {0, start}. - HloInstruction* original_start_indices = - lhs_is_dynamic_slice ? lhs->mutable_operand(1) : rhs->mutable_operand(1); - // Position of start: - int index_of_non_zero_start = lhs_is_dynamic_slice - ? 1 - lhs_contracting_dimension - : 1 - rhs_contracting_dimension; - // Position of zero: - int index_of_zero_start = 1 - index_of_non_zero_start; - - // Slice out start and 0 components and reorder if necessary. - auto indices_type = original_start_indices->shape().element_type(); - Shape s_shape = ShapeUtil::MakeShape(indices_type, {1}); - Shape d_shape = ShapeUtil::MakeShape(indices_type, {2}); - HloInstruction* non_zero_start = - computation_->AddInstruction(HloInstruction::CreateSlice( - s_shape, original_start_indices, {index_of_non_zero_start}, - {index_of_non_zero_start + 1}, {1})); - HloInstruction* zero_start = - computation_->AddInstruction(HloInstruction::CreateSlice( - s_shape, original_start_indices, {index_of_zero_start}, - {index_of_zero_start + 1}, {1})); - HloInstruction* new_start_indices = - lhs_is_dynamic_slice - ? computation_->AddInstruction(HloInstruction::CreateConcatenate( - d_shape, {non_zero_start, zero_start}, 0)) - : computation_->AddInstruction(HloInstruction::CreateConcatenate( - d_shape, {zero_start, non_zero_start}, 0)); - - // Build DynamicSlice(ctA x ctB). - const int new_slice_m = lhs_is_dynamic_slice ? 1 : m; - const int new_slice_n = lhs_is_dynamic_slice ? n : 1; - auto* memoized_lookup = - computation_->AddInstruction(HloInstruction::CreateDynamicSlice( - dot->shape(), memoized_inst, new_start_indices, - {new_slice_m, new_slice_n})); - - return memoized_lookup; -} - Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { auto lhs = dot->mutable_operand(0); auto rhs = dot->mutable_operand(1); @@ -1076,17 +946,6 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { return ReplaceInstruction(dot, dot_of_concat_optimized); } - // Simplify dot(ConstA, Gather(Index, ConstB)) to: - // Gather(Index, dot*(ConstA, ConstB)), where dot* is an appropriately - // batched version of dot. - TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_gather_optimized, - OptimizeDotOfGather(dot)); - if (dot_of_gather_optimized) { - VLOG(10) << "Replaced dot(constA, gather(i, constB)) with " - "gather(i, dot*(constA, constB))"; - return ReplaceInstruction(dot, dot_of_gather_optimized); - } - if (enable_dot_strength_reduction_ && !is_layout_sensitive_) { TF_ASSIGN_OR_RETURN(bool did_strength_reduction, HandleDotStrengthReduction(dot)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index fc78420147..0f08eb3a32 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2772,208 +2772,5 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); - -struct DotOfGatherTestSpec { - int64 m; - int64 k; - int64 n; - int s; // start index for dynamic slice on the non-contracting dimension - int64 lcd; // left contracting dimension - int64 rcd; // right contracting dimension - bool neg; // is negative testcase -}; - -class DotOfGatherSimplificationTest - : public HloVerifiedTestBase, - public ::testing::WithParamInterface {}; - -// input: dot(DS(ctA), ctB)) -// where DS(ctA) = DS({M x K}, {s, 0}, {1, K}) and ctB = {K x N}. -// => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. -// output: DS(dot(ctA, ctB)) -// => output dimensions: DS ({M x N}, {s, 0}, {1, N}) => {1 x N}. -TEST_P(DotOfGatherSimplificationTest, ConstantRHS) { - HloComputation::Builder builder(TestName()); - - DotOfGatherTestSpec spec = GetParam(); - - ASSERT_LE(spec.s, spec.m); - - // For negative tests, increase k of the dynamic slice argument to prevent the - // optimization (constants ctA, ctB must have equal contracting dimensions). - int64 k_increase = spec.neg ? 5 : 0; - int64 lhs_rows = (spec.lcd == 0) ? (spec.k + k_increase) : spec.m; - int64 lhs_cols = (spec.lcd == 0) ? spec.m : (spec.k + k_increase); - Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); - auto* lhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, - /*cols=*/lhs_cols))); - - int32 start_row = (spec.lcd == 0) ? 0 : spec.s; - int32 start_col = (spec.lcd == 0) ? spec.s : 0; - const auto start_indices = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({start_row, start_col}))); - int64 slice_row_size = (spec.lcd == 0) ? spec.k : 1; - int64 slice_col_size = (spec.lcd == 0) ? 1 : spec.k; - Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); - auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - ds_shape, lhs, start_indices, {slice_row_size, slice_col_size})); - - int64 rhs_rows = (spec.rcd == 0) ? spec.k : spec.n; - int64 rhs_cols = (spec.rcd == 0) ? spec.n : spec.k; - Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); - auto* rhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, - /*cols=*/rhs_cols))); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(spec.lcd); - dot_dnums.add_rhs_contracting_dimensions(spec.rcd); - - int64 dot_row_size = 1; - int64 dot_col_size = spec.n; - Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); - builder.AddInstruction( - HloInstruction::CreateDot(dot_shape, ds, rhs, dot_dnums)); - - auto computation = module().AddEntryComputation(builder.Build()); - AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, - non_bitcasting_callback()); - TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); - ASSERT_TRUE(run_successful); - EXPECT_TRUE( - ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); - - if (spec.neg) { - EXPECT_NE(computation->root_instruction()->opcode(), - HloOpcode::kDynamicSlice); - } else { - EXPECT_THAT(computation->root_instruction(), - op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), - op::Concatenate())); - } -} - -// input: dot(ctA, DS(ctB)) -// where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, s}, {K, 1}). -// => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. -// output: DS(dot(ctA, ctB)) -// => output dimensions: DS ({M x N}, {0, s}, {M, 1}) => {M x 1}. -TEST_P(DotOfGatherSimplificationTest, ConstantLHS) { - HloComputation::Builder builder(TestName()); - - DotOfGatherTestSpec spec = GetParam(); - - ASSERT_LE(spec.s, spec.n); - - int64 lhs_rows = (spec.lcd == 0) ? spec.k : spec.m; - int64 lhs_cols = (spec.lcd == 0) ? spec.m : spec.k; - Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); - auto* lhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, - /*cols=*/lhs_cols))); - - // For negative tests increase k of the dynamic slice argument to prevent the - // optimization - int64 k_increase = spec.neg ? 5 : 0; - int64 rhs_rows = (spec.rcd == 0) ? (spec.k + k_increase) : spec.n; - int64 rhs_cols = (spec.rcd == 0) ? spec.n : (spec.k + k_increase); - Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); - auto* rhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, - /*cols=*/rhs_cols))); - - int32 start_row = (spec.rcd == 0) ? 0 : spec.s; - int32 start_col = (spec.rcd == 0) ? spec.s : 0; - const auto start_indices = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({start_row, start_col}))); - int64 slice_row_size = (spec.rcd == 0) ? spec.k : 1; - int64 slice_col_size = (spec.rcd == 0) ? 1 : spec.k; - Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); - auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - ds_shape, rhs, start_indices, {slice_row_size, slice_col_size})); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(spec.lcd); - dot_dnums.add_rhs_contracting_dimensions(spec.rcd); - - int64 dot_row_size = spec.m; - int64 dot_col_size = 1; - Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); - builder.AddInstruction( - HloInstruction::CreateDot(dot_shape, lhs, ds, dot_dnums)); - - auto computation = module().AddEntryComputation(builder.Build()); - AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, - non_bitcasting_callback()); - TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); - ASSERT_TRUE(run_successful); - EXPECT_TRUE( - ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); - - if (spec.neg) { - EXPECT_NE(computation->root_instruction()->opcode(), - HloOpcode::kDynamicSlice); - } else { - EXPECT_THAT(computation->root_instruction(), - op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), - op::Concatenate())); - } -} - -std::vector DotOfGatherPositiveNegativeTests() { - std::vector positives = { - // "Classical dot", i.e. matrix multiply: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - // Note: testing for m=1 and n=1 is unnecessary, as this optimizes to - // dot(ct, ct) before DotOfGather optimization kicks in. - // Contract on rows: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - // Reverse matrix multiply: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - // Contract on columns: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - }; - std::vector all; - for (int i = 0; i < positives.size(); i++) { - DotOfGatherTestSpec positive_test = positives[i]; - all.push_back(positive_test); - DotOfGatherTestSpec negative_test = positive_test; - negative_test.neg = true; - all.push_back(negative_test); - } - return all; -} - -INSTANTIATE_TEST_CASE_P( - DotOfGatherSimplificationTestInstantiation, DotOfGatherSimplificationTest, - ::testing::ValuesIn(DotOfGatherPositiveNegativeTests())); - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 63354d4b30..6b0c04c2c0 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -703,251 +703,5 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { &builder, expected, {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); } - -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{96.0, 105.0, 114.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{105.0}, {105.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( - DotOfGatherOptimizationWithConstRHSReverseMM)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{105.0, 105.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( - DotOfGatherOptimizationWithConstLHSReverseMM)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{96.0}, {105.0}, {114.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{126.0, 129.0, 132.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{129.0}, {129.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{56.0, 168.0, 91.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{168.0}, {168.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} } // namespace } // namespace xla -- GitLab From 203caffbee9470109e3f750ba847e0aa4894a1e6 Mon Sep 17 00:00:00 2001 From: Rajendra arora Date: Fri, 16 Feb 2018 10:56:25 +0530 Subject: [PATCH 266/629] Documentation api reference badge added in Readme.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 916e5200b2..efacf063e3 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ ----------------- -| **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | -|-----------------|---------------------|------------------|-------------------|---------------| -| [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) | + +| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | +|-----------------|---------------------|------------------|-------------------|---------------|---------------| +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while -- GitLab From 4e7772e0c74a663809f9fcf39545032eb8277e6a Mon Sep 17 00:00:00 2001 From: Rajendra arora Date: Fri, 16 Feb 2018 11:48:10 +0530 Subject: [PATCH 267/629] Added a contribution guideline header in readme.md --- README.md | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index efacf063e3..ef5bdc66ef 100644 --- a/README.md +++ b/README.md @@ -22,20 +22,6 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. -**If you want to contribute to TensorFlow, be sure to review the [contribution -guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's -[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to -uphold this code.** - -**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for -tracking requests and bugs. So please see -[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions -and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** - -The TensorFlow project strives to abide by generally accepted best practices in open-source software development: - -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) - ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* @@ -76,6 +62,22 @@ $ python >>> sess.close() ``` +## Contribution guidelines + +**If you want to contribute to TensorFlow, be sure to review the [contribution +guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's +[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to +uphold this code.** + +**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for +tracking requests and bugs. So please see +[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions +and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** + +The TensorFlow project strives to abide by generally accepted best practices in open-source software development: + +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) + ## For more information * [TensorFlow Website](https://www.tensorflow.org) -- GitLab From f08155b7256e59f265a38d30de21ed2ced9d5ffa Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 15 Feb 2018 22:21:02 -0800 Subject: [PATCH 268/629] Make the default values for experimental and non experimental apis match. PiperOrigin-RevId: 185952648 --- .../contrib/quantize/python/quantize_graph.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 0dfe78fd02..5a3a74cec4 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -69,7 +69,7 @@ def _create_graph(input_graph=None, activation_bits=activation_bits) -def create_training_graph(input_graph=None, quant_delay=250000): +def create_training_graph(input_graph=None, quant_delay=0): """Rewrites a training input_graph in place for simulated quantization. The graph has fake quantization ops inserted to simulate the error @@ -77,6 +77,14 @@ def create_training_graph(input_graph=None, quant_delay=250000): the expected behavior of previously held references to nodes and tensors may change. + The default value of quant_delay is suitable for finetuning an already trained + floating point model (recommended). + If one wants to train a quantized model from scratch, quant_delay should be + set to the number of steps it take the floating point model to converge. + Quantization will be activated at this point and effectively finetune the + model. If quant_delay is not provided when training from scratch, training can + often fail. + Args: input_graph: The tf.Graph to be transformed. quant_delay: Number of steps after which weights and activations are @@ -93,12 +101,12 @@ def create_training_graph(input_graph=None, quant_delay=250000): # Corresponds to case of restoring from a floating point checkpoint # In this case, we can freeze the moving mean and variance early on and # switch to using them during training. Therefore, freeze_bn_delay is set to - # 200000 - freeze_bn_delay = 200000 + # 2e5. + freeze_bn_delay = int(2e5) else: # If training from scratch, set freeze_bn_delay to 100 epochs after quant # delay. With a batch size of 64, this corresponds to 20000*100=2M steps. - freeze_bn_delay = quant_delay + 2000000 + freeze_bn_delay = quant_delay + int(2e6) _create_graph( input_graph=input_graph, @@ -129,8 +137,8 @@ def create_eval_graph(input_graph=None): def experimental_create_training_graph(input_graph=None, weight_bits=8, activation_bits=8, - quant_delay=250000, - freeze_bn_delay=500000): + quant_delay=0, + freeze_bn_delay=int(2e5)): """Rewrites a training input_graph in place for simulated quantization. This function has additional experimental options not (yet) available to @@ -141,6 +149,14 @@ def experimental_create_training_graph(input_graph=None, the expected behavior of previously held references to nodes and tensors may change. + The default value of quant_delay is suitable for finetuning an already trained + floating point model (recommended). + If one wants to train a quantized model from scratch, quant_delay should be + set to the number of steps it take the floating point model to converge. + Quantization will be activated at this point and effectively finetune the + model. If quant_delay is not provided when training from scratch, training can + often fail. + Args: input_graph: The tf.Graph to be transformed,if None then defaults to the default graph. -- GitLab From d536c5de09276ae935981a498c6ac46006646809 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 15 Feb 2018 23:44:47 -0800 Subject: [PATCH 269/629] Code generator for builtin_ops.h, and a test to ensure its consistency PiperOrigin-RevId: 185957720 --- tensorflow/contrib/lite/builtin_ops.h | 80 +++++++++++ .../lite/schema/builtin_ops_header/BUILD | 43 ++++++ .../lite/schema/builtin_ops_header/README.md | 12 ++ .../builtin_ops_header/consistency_test.cc | 47 +++++++ .../schema/builtin_ops_header/generate.cc | 25 ++++ .../schema/builtin_ops_header/generator.cc | 132 ++++++++++++++++++ .../schema/builtin_ops_header/generator.h | 38 +++++ .../builtin_ops_header/generator_test.cc | 63 +++++++++ 8 files changed, 440 insertions(+) create mode 100644 tensorflow/contrib/lite/builtin_ops.h create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/BUILD create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/README.md create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator.h create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h new file mode 100644 index 0000000000..4ebd1586de --- /dev/null +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -0,0 +1,80 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +#define TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ + +// DO NOT EDIT MANUALLY: This file is automatically generated by +// `schema_builtin_ops_header_generator.py`. + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef enum { + kTfLiteBuiltinAdd = 0, + kTfLiteBuiltinAveragePool2d = 1, + kTfLiteBuiltinConcatenation = 2, + kTfLiteBuiltinConv2d = 3, + kTfLiteBuiltinDepthwiseConv2d = 4, + kTfLiteBuiltinEmbeddingLookup = 7, + kTfLiteBuiltinFullyConnected = 9, + kTfLiteBuiltinHashtableLookup = 10, + kTfLiteBuiltinL2Normalization = 11, + kTfLiteBuiltinL2Pool2d = 12, + kTfLiteBuiltinLocalResponseNormalization = 13, + kTfLiteBuiltinLogistic = 14, + kTfLiteBuiltinLshProjection = 15, + kTfLiteBuiltinLstm = 16, + kTfLiteBuiltinMaxPool2d = 17, + kTfLiteBuiltinMul = 18, + kTfLiteBuiltinRelu = 19, + kTfLiteBuiltinReluN1To1 = 20, + kTfLiteBuiltinRelu6 = 21, + kTfLiteBuiltinReshape = 22, + kTfLiteBuiltinResizeBilinear = 23, + kTfLiteBuiltinRnn = 24, + kTfLiteBuiltinSoftmax = 25, + kTfLiteBuiltinSpaceToDepth = 26, + kTfLiteBuiltinSvdf = 27, + kTfLiteBuiltinTanh = 28, + kTfLiteBuiltinConcatEmbeddings = 29, + kTfLiteBuiltinSkipGram = 30, + kTfLiteBuiltinCall = 31, + kTfLiteBuiltinCustom = 32, + kTfLiteBuiltinEmbeddingLookupSparse = 33, + kTfLiteBuiltinPad = 34, + kTfLiteBuiltinUnidirectionalSequenceRnn = 35, + kTfLiteBuiltinGather = 36, + kTfLiteBuiltinBatchToSpaceNd = 37, + kTfLiteBuiltinSpaceToBatchNd = 38, + kTfLiteBuiltinTranspose = 39, + kTfLiteBuiltinMean = 40, + kTfLiteBuiltinSub = 41, + kTfLiteBuiltinDiv = 42, + kTfLiteBuiltinSqueeze = 43, + kTfLiteBuiltinUnidirectionalSequenceLstm = 44, + kTfLiteBuiltinStridedSlice = 45, + kTfLiteBuiltinBidirectionalSequenceRnn = 46, + kTfLiteBuiltinExp = 47, + kTfLiteBuiltinTopkV2 = 48, + kTfLiteBuiltinSplit = 49, +} TfLiteBuiltinOperator; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD b/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD new file mode 100644 index 0000000000..0148149a6a --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD @@ -0,0 +1,43 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "generator", + srcs = ["generator.cc"], + hdrs = ["generator.h"], + deps = [ + "//tensorflow/contrib/lite/schema:schema_fbs", + ], +) + +cc_binary( + name = "generate", + srcs = ["generate.cc"], + deps = [ + ":generator", + ], +) + +cc_test( + name = "generator_test", + srcs = ["generator_test.cc"], + deps = [ + ":generator", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "consistency_test", + srcs = ["consistency_test.cc"], + data = [ + "//tensorflow/contrib/lite:builtin_ops.h", + ], + deps = [ + ":generator", + "@com_google_googletest//:gtest", + ], +) diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/README.md b/tensorflow/contrib/lite/schema/builtin_ops_header/README.md new file mode 100644 index 0000000000..f20d4f664e --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/README.md @@ -0,0 +1,12 @@ +# Builtin Ops Header Generator. + +This directory contains a code generator to generate a pure C header for +builtin op definition. + +Whenever you add a new builtin op, please execute: + +```sh +bazel run \ + //tensorflow/contrib/lite/schema/builtin_ops_header:generate > \ + tensorflow/contrib/lite/builtin_ops.h +``` diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc new file mode 100644 index 0000000000..d55c125c11 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" + +namespace { + +const char* kHeaderFileName = + "tensorflow/contrib/lite/builtin_ops.h"; + +// The test ensures that `builtin_ops.h` is consistent with the FlatBuffer +// schema definition. When the schema is modified, it's required to run the +// generator to re-generate the header. +// Please see README.md for more details. +TEST(BuiltinOpsHeaderTest, TestConsistency) { + std::ifstream input_stream(kHeaderFileName, std::ios::binary); + ASSERT_TRUE(input_stream); + std::string file_content((std::istreambuf_iterator(input_stream)), + std::istreambuf_iterator()); + + std::ostringstream output_stream; + tflite::builtin_ops_header::GenerateHeader(output_stream); + std::string generated_content = output_stream.str(); + + EXPECT_EQ(file_content, generated_content); +} + +} // anonymous namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc new file mode 100644 index 0000000000..72a28987b8 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc @@ -0,0 +1,25 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" + +// This executable is used to generate builtin_ops.h in TensorFlow Lite. +// Please see README.md for more details. +int main() { + if (!tflite::builtin_ops_header::GenerateHeader(std::cout)) { + std::cerr << "Failed to generate the header file.\n"; + } + return 0; +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc new file mode 100644 index 0000000000..b983d59d85 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -0,0 +1,132 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { +namespace builtin_ops_header { + +namespace { +const char* kFileHeader = + R"(/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +#define TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ + +// DO NOT EDIT MANUALLY: This file is automatically generated by +// `schema_builtin_ops_header_generator.py`. + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef enum { +)"; + +const char* kFileFooter = + R"(} TfLiteBuiltinOperator; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +} +)"; +} // anonymous namespace + +bool IsValidInputEnumName(const std::string& name) { + const char* begin = name.c_str(); + const char* ch = begin; + while (*ch != '\0') { + // If it's not the first character, expect an underscore. + if (ch != begin) { + if (*ch != '_') { + return false; + } + ++ch; + } + + // Expecting a word with upper case letters or digits, like "CONV", + // "CONV2D", "2D"...etc. + bool empty = true; + while (isupper(*ch) || isdigit(*ch)) { + // It's not empty if at least one character is consumed. + empty = false; + ++ch; + } + if (empty) { + return false; + } + } + return true; +} + +std::string ConstantizeVariableName(const std::string& name) { + std::string result = "kTfLiteBuiltin"; + bool uppercase = true; + for (char input_char : name) { + if (input_char == '_') { + uppercase = true; + } else if (uppercase) { + result += toupper(input_char); + uppercase = false; + } else { + result += tolower(input_char); + } + } + + return result; +} + +bool GenerateHeader(std::ostream& os) { + auto enum_names = tflite::EnumNamesBuiltinOperator(); + + // Check if all the input enum names are valid. + for (auto enum_value : EnumValuesBuiltinOperator()) { + auto enum_name = enum_names[enum_value]; + if (!IsValidInputEnumName(enum_name)) { + std::cerr << "Invalid input enum name: " << enum_name << std::endl; + return false; + } + } + + os << kFileHeader; + for (auto enum_value : EnumValuesBuiltinOperator()) { + auto enum_name = enum_names[enum_value]; + os << " "; + os << ConstantizeVariableName(enum_name); + os << " = "; + os << enum_value; + os << ",\n"; + } + os << kFileFooter; + return true; +} + +} // namespace builtin_ops_header +} // namespace tflite diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h new file mode 100644 index 0000000000..3241ff83d5 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// An utility library to generate pure C header for builtin ops definition. +#ifndef TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ +#define TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ + +#include + +namespace tflite { +namespace builtin_ops_header { + +// Check if the input enum name (from the Flatbuffer definition) is valid. +bool IsValidInputEnumName(const std::string& name); + +// Convert the enum name from Flatbuffer convention to C enum name convention. +// E.g. `L2_POOL_2D` becomes `kTfLiteBuiltinL2Pool2d`. +std::string ConstantizeVariableName(const std::string& name); + +// The function generates a pure C header for builtin ops definition, and write +// it to the output stream. +bool GenerateHeader(std::ostream& os); + +} // namespace builtin_ops_header +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc new file mode 100644 index 0000000000..a7dc8e1b04 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc @@ -0,0 +1,63 @@ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" +#include +#include + +namespace { + +using tflite::builtin_ops_header::ConstantizeVariableName; +using tflite::builtin_ops_header::IsValidInputEnumName; + +TEST(TestIsValidInputEnumName, TestWithValidInputNames) { + EXPECT_TRUE(IsValidInputEnumName("ADD")); + EXPECT_TRUE(IsValidInputEnumName("CONV_2D")); + EXPECT_TRUE(IsValidInputEnumName("L2_POOL_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithLeadingUnderscore) { + EXPECT_FALSE(IsValidInputEnumName("_ADD")); + EXPECT_FALSE(IsValidInputEnumName("_CONV_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithLowerCase) { + EXPECT_FALSE(IsValidInputEnumName("_AdD")); + EXPECT_FALSE(IsValidInputEnumName("_COnV_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithOtherCharacters) { + EXPECT_FALSE(IsValidInputEnumName("_AdD!2D")); + EXPECT_FALSE(IsValidInputEnumName("_COnV?2D")); +} + +TEST(TestIsValidInputEnumName, TestWithDoubleUnderscores) { + EXPECT_FALSE(IsValidInputEnumName("ADD__2D")); + EXPECT_FALSE(IsValidInputEnumName("CONV__2D")); +} + +TEST(TestConstantizeVariableName, TestWithValidInputNames) { + EXPECT_EQ(ConstantizeVariableName("ADD"), "kTfLiteBuiltinAdd"); + EXPECT_EQ(ConstantizeVariableName("CONV_2D"), "kTfLiteBuiltinConv2d"); + EXPECT_EQ(ConstantizeVariableName("L2_POOL_2D"), "kTfLiteBuiltinL2Pool2d"); +} + +} // anonymous namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- GitLab From ecfcf07a418a526e1a6cb2d9c8d6a5bd9d46d430 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Feb 2018 01:53:59 -0800 Subject: [PATCH 270/629] Remove a possible ambiguity in the `py_func` documentation. PiperOrigin-RevId: 185968663 --- tensorflow/python/ops/script_ops.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 61e14adf4b..0ba29cbf32 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -267,7 +267,7 @@ def py_func(func, inp, Tout, stateful=True, name=None): """Wraps a python function and uses it as a TensorFlow op. Given a python function `func`, which takes numpy arrays as its - inputs and returns numpy arrays as its outputs, wrap this function as an + arguments and returns numpy arrays as its outputs, wrap this function as an operation in a TensorFlow graph. The following snippet constructs a simple TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation in the graph: @@ -276,8 +276,8 @@ def py_func(func, inp, Tout, stateful=True, name=None): def my_func(x): # x will be a numpy array with the contents of the placeholder below return np.sinh(x) - inp = tf.placeholder(tf.float32) - y = tf.py_func(my_func, [inp], tf.float32) + input = tf.placeholder(tf.float32) + y = tf.py_func(my_func, [input], tf.float32) ``` **N.B.** The `tf.py_func()` operation has the following known limitations: @@ -293,10 +293,12 @@ def py_func(func, inp, Tout, stateful=True, name=None): server (e.g. using `with tf.device():`). Args: - func: A Python function, which accepts a list of NumPy `ndarray` objects - having element types that match the corresponding `tf.Tensor` objects - in `inp`, and returns a list of `ndarray` objects (or a single `ndarray`) - having element types that match the corresponding values in `Tout`. + func: A Python function, which accepts `ndarray` objects as arguments and + returns a list of `ndarray` objects (or a single `ndarray`). This function + must accept as many arguments as there are tensors in `inp`, and these + argument types will match the corresponding `tf.Tensor` objects + in `inp`. The returns `ndarray`s must match the number and types defined + `Tout`. Important Note: Input and output numpy `ndarray`s of `func` are not guaranteed to be copies. In some cases their underlying memory will be shared with the corresponding TensorFlow tensors. -- GitLab From f4d95b4abc45645ff5ed1670abc73fe0ffe49a82 Mon Sep 17 00:00:00 2001 From: kdavis-mozilla Date: Fri, 16 Feb 2018 11:41:19 +0100 Subject: [PATCH 271/629] Added Deep Speech use --- tensorflow/docs_src/about/uses.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/docs_src/about/uses.md b/tensorflow/docs_src/about/uses.md index 8818177a28..d646880bd3 100644 --- a/tensorflow/docs_src/about/uses.md +++ b/tensorflow/docs_src/about/uses.md @@ -22,6 +22,14 @@ This section describes some of the current uses of the TensorFlow system. > TensorFlow, or even better, send us a pull request to add an entry to this > file. +* **Deep Speech** +
      +
    • **Organization**: Mozilla
    • +
    • **Domain**: Speech Recognition
    • +
    • **Description**: A TensorFlow implementation motivated by Baidu's Deep Speech architecture.
    • +
    • **More info**: [GitHub Repo](https://github.com/mozilla/deepspeech)
    • +
    + * **RankBrain**