From 6d96cc4d05fd09e5663853a795bcd9a5b01f1732 Mon Sep 17 00:00:00 2001 From: hehongliang Date: Sat, 14 Jul 2018 23:58:23 +0000 Subject: [PATCH 001/351] Register gradient function for DepthwiseConv2dNativeBackpropInput and DepthwiseConv2dNativeBackpropFilter --- tensorflow/python/ops/nn_grad.py | 51 +++++++++++++++++ tensorflow/python/ops/nn_grad_test.py | 81 +++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 3a41391340..252447bcac 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -85,6 +85,57 @@ def _Conv2DBackpropFilterGrad(op, grad): data_format=op.get_attr("data_format")) ] +@ops.RegisterGradient("DepthwiseConv2dNativeBackpropInput") +def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): + """The derivatives for deconvolution. + + Args: + op: the Deconvolution op. + grad: the tensor representing the gradient w.r.t. the output + + Returns: + the gradients w.r.t. the input and the filter + """ + return [ + None, + nn_ops.depthwise_conv2d_native_backprop_filter( + grad, + array_ops.shape(op.inputs[1]), + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), + nn_ops.depthwise_conv2d_native( + grad, + op.inputs[1], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] + + +@ops.RegisterGradient("DepthwiseConv2dNativeBackpropFilter") +def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): + return [ + nn_ops.depthwise_conv2d_native_backprop_input( + array_ops.shape(op.inputs[0]), + grad, + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), None, + nn_ops.depthwise_conv2d_native( + op.inputs[0], + grad, + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] + @ops.RegisterGradient("Conv3D") def _Conv3DGrad(op, grad): diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py index 49d54beb20..729c042858 100644 --- a/tensorflow/python/ops/nn_grad_test.py +++ b/tensorflow/python/ops/nn_grad_test.py @@ -26,6 +26,8 @@ from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_grad # pylint: disable=unused-import from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import nn_impl +from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -47,5 +49,84 @@ class Relu6OpTest(test.TestCase): self.assertLess(error, 1e-4) +class Conv2dOpTest(test.TestCase): + + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + + def testConv2dGradWRTInput(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") + self.run_test(input, y) + + def testConv2dGradWRTFilter(self): + input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") + self.run_test(filter, y) + + def testConv2dBackpropFilterGrad(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(input, filter, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, input)[0] + self.run_test(filter, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, filter)[0] + self.run_test(input, grad_wrt_filter) + + +class DepthwiseConv2dTest(test.TestCase): + + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testDepthwiseConv2dGradWRTInput(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(input, filter, strides, padding) + self.run_test(input, y) + + def testDepthwiseConv2dGradWRTFilter(self): + input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(input, filter, strides, padding) + self.run_test(filter, y) + + def testDepthwiseConv2dBackpropFilterGrad(self): + input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') + filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') + strides = [1,1,1,1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(input, filter, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, input)[0] + self.run_test(filter, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, filter)[0] + self.run_test(input, grad_wrt_filter) + + if __name__ == "__main__": test.main() -- GitLab From 4062d414975e3637983738a449efb536d226793e Mon Sep 17 00:00:00 2001 From: hehongliang Date: Tue, 17 Jul 2018 06:29:28 +0000 Subject: [PATCH 002/351] modify code by google python style --- tensorflow/python/ops/nn_grad.py | 72 ++++++------ tensorflow/python/ops/nn_grad_test.py | 153 ++++++++++++++------------ 2 files changed, 118 insertions(+), 107 deletions(-) mode change 100644 => 100755 tensorflow/python/ops/nn_grad.py mode change 100644 => 100755 tensorflow/python/ops/nn_grad_test.py diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py old mode 100644 new mode 100755 index 252447bcac..77283d2488 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -87,7 +87,7 @@ def _Conv2DBackpropFilterGrad(op, grad): @ops.RegisterGradient("DepthwiseConv2dNativeBackpropInput") def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): - """The derivatives for deconvolution. + """The derivatives for deconvolution. Args: op: the Deconvolution op. @@ -96,45 +96,45 @@ def _DepthwiseConv2dNativeBackpropInputGrad(op, grad): Returns: the gradients w.r.t. the input and the filter """ - return [ - None, - nn_ops.depthwise_conv2d_native_backprop_filter( - grad, - array_ops.shape(op.inputs[1]), - op.inputs[2], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")), - nn_ops.depthwise_conv2d_native( - grad, - op.inputs[1], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")) - ] + return [ + None, + nn_ops.depthwise_conv2d_native_backprop_filter( + grad, + array_ops.shape(op.inputs[1]), + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), + nn_ops.depthwise_conv2d_native( + grad, + op.inputs[1], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] @ops.RegisterGradient("DepthwiseConv2dNativeBackpropFilter") def _DepthwiseConv2dNativeBackpropFilterGrad(op, grad): - return [ - nn_ops.depthwise_conv2d_native_backprop_input( - array_ops.shape(op.inputs[0]), - grad, - op.inputs[2], - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")), None, - nn_ops.depthwise_conv2d_native( - op.inputs[0], - grad, - dilations=op.get_attr("dilations"), - strides=op.get_attr("strides"), - padding=op.get_attr("padding"), - data_format=op.get_attr("data_format")) - ] + return [ + nn_ops.depthwise_conv2d_native_backprop_input( + array_ops.shape(op.inputs[0]), + grad, + op.inputs[2], + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")), None, + nn_ops.depthwise_conv2d_native( + op.inputs[0], + grad, + dilations=op.get_attr("dilations"), + strides=op.get_attr("strides"), + padding=op.get_attr("padding"), + data_format=op.get_attr("data_format")) + ] @ops.RegisterGradient("Conv3D") diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py old mode 100644 new mode 100755 index 729c042858..b3df06fcd2 --- a/tensorflow/python/ops/nn_grad_test.py +++ b/tensorflow/python/ops/nn_grad_test.py @@ -51,81 +51,92 @@ class Relu6OpTest(test.TestCase): class Conv2dOpTest(test.TestCase): - def run_test(self, x, y): - with self.test_session(): - error = gradient_checker.compute_gradient_error( - x, - x.get_shape().as_list(), - y, - y.get_shape().as_list()) - self.assertLess(error, 1e-3) - - - def testConv2dGradWRTInput(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") - self.run_test(input, y) - - def testConv2dGradWRTFilter(self): - input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - y = nn_ops.conv2d(input, filter, [1,1,1,1], "SAME") - self.run_test(filter, y) - - def testConv2dBackpropFilterGrad(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - out = nn_impl.depthwise_conv2d(input, filter, strides, padding) - - grad_wrt_input = gradients_impl.gradients(out, input)[0] - self.run_test(filter, grad_wrt_input) - - grad_wrt_filter = gradients_impl.gradients(out, filter)[0] - self.run_test(input, grad_wrt_filter) + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testConv2dGradWRTInput(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + y = nn_ops.conv2d(x, f, [1, 1, 1, 1], "SAME") + self.run_test(x, y) + + def testConv2dGradWRTFilter(self): + x = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = array_ops.placeholder( + dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + y = nn_ops.conv2d(x, f, [1, 1, 1, 1], "SAME") + self.run_test(f, y) + + def testConv2dBackpropFilterGrad(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(x, f, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, x)[0] + self.run_test(f, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, f)[0] + self.run_test(x, grad_wrt_filter) class DepthwiseConv2dTest(test.TestCase): - def run_test(self, x, y): - with self.test_session(): - error = gradient_checker.compute_gradient_error( - x, - x.get_shape().as_list(), - y, - y.get_shape().as_list()) - self.assertLess(error, 1e-3) - - def testDepthwiseConv2dGradWRTInput(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - y = nn_impl.depthwise_conv2d(input, filter, strides, padding) - self.run_test(input, y) - - def testDepthwiseConv2dGradWRTFilter(self): - input = constant_op.constant([0.5], dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = array_ops.placeholder(dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - y = nn_impl.depthwise_conv2d(input, filter, strides, padding) - self.run_test(filter, y) - - def testDepthwiseConv2dBackpropFilterGrad(self): - input = array_ops.placeholder(dtype = dtypes.float32, shape=[1,4,4,3], name='input') - filter = constant_op.constant([0.5], dtype = dtypes.float32, shape=[2,2,3,2], name='filter') - strides = [1,1,1,1] - padding = "SAME" - out = nn_impl.depthwise_conv2d(input, filter, strides, padding) - - grad_wrt_input = gradients_impl.gradients(out, input)[0] - self.run_test(filter, grad_wrt_input) - - grad_wrt_filter = gradients_impl.gradients(out, filter)[0] - self.run_test(input, grad_wrt_filter) + def run_test(self, x, y): + with self.test_session(): + error = gradient_checker.compute_gradient_error( + x, + x.get_shape().as_list(), + y, + y.get_shape().as_list()) + self.assertLess(error, 1e-3) + + def testDepthwiseConv2dGradWRTInput(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(x, f, strides, padding) + self.run_test(x, y) + + def testDepthwiseConv2dGradWRTFilter(self): + x = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = array_ops.placeholder( + dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + y = nn_impl.depthwise_conv2d(x, f, strides, padding) + self.run_test(f, y) + + def testDepthwiseConv2dBackpropFilterGrad(self): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=[1, 4, 4, 3], name='input') + f = constant_op.constant( + [0.5], dtype=dtypes.float32, shape=[2, 2, 3, 2], name='filter') + strides = [1, 1, 1, 1] + padding = "SAME" + out = nn_impl.depthwise_conv2d(x, f, strides, padding) + + grad_wrt_input = gradients_impl.gradients(out, x)[0] + self.run_test(f, grad_wrt_input) + + grad_wrt_filter = gradients_impl.gradients(out, f)[0] + self.run_test(x, grad_wrt_filter) if __name__ == "__main__": -- GitLab From 0ade4eb41fea75de3eaf94075bcfa8009c3b2c4b Mon Sep 17 00:00:00 2001 From: hehongliang Date: Fri, 27 Jul 2018 02:23:15 +0000 Subject: [PATCH 003/351] change file permission back to 10064 --- tensorflow/python/ops/nn_grad.py | 0 tensorflow/python/ops/nn_grad_test.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tensorflow/python/ops/nn_grad.py mode change 100755 => 100644 tensorflow/python/ops/nn_grad_test.py diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py old mode 100755 new mode 100644 diff --git a/tensorflow/python/ops/nn_grad_test.py b/tensorflow/python/ops/nn_grad_test.py old mode 100755 new mode 100644 -- GitLab From c7c5f17a4671c51f0f5706b461d70b8573a1659d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:58:02 +0800 Subject: [PATCH 004/351] TST: add test TensorShape for add_variance --- .../python/keras/engine/topology_test.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tensorflow/python/keras/engine/topology_test.py b/tensorflow/python/keras/engine/topology_test.py index 079c8dae71..fabcb8b055 100644 --- a/tensorflow/python/keras/engine/topology_test.py +++ b/tensorflow/python/keras/engine/topology_test.py @@ -42,6 +42,34 @@ except ImportError: class TopologyConstructionTest(test.TestCase): + def test_add_variable_supports_TensorShape(self): + + class MyLayer(keras.layers.Layer): + + def build(self, input_shape): + self.a = self.add_variable( + 'a', + tensor_shape.TensorShape([1, 2]), + 'float32') + self.b = self.add_variable( + 'b', + tensor_shape.TensorShape([1, 4]), + 'int32') + self.built = True + + def call(self, inputs): + return inputs + + x1 = input_layer_lib.Input(shape=(1,)) + # Github issue #21838: + # Won't raise exception here when constructing. + layer = MyLayer() + _ = layer.apply(x1) + self.assertEqual(layer.a.get_shape(), + tensor_shape.TensorShape([1, 2])) + self.assertEqual(layer.b.get_shape(), + tensor_shape.TensorShape([1, 4])) + def test_get_updates(self): class MyLayer(keras.layers.Layer): -- GitLab From 24effed4c02c73958c7b3f535f31b09b97b85f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:58:29 +0800 Subject: [PATCH 005/351] TST: add test TensorShape for initializer --- tensorflow/python/ops/init_ops_test.py | 203 ++++++++++++++----------- 1 file changed, 111 insertions(+), 92 deletions(-) diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py index 6a1fe17119..96e4258bdb 100644 --- a/tensorflow/python/ops/init_ops_test.py +++ b/tensorflow/python/ops/init_ops_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape as tensor_shape_lib from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test @@ -54,142 +55,160 @@ class InitializersTest(test.TestCase): self.assertGreater(lim, abs(output.min() - target_min)) def test_uniform(self): - tensor_shape = (9, 6, 7) + shape = [9, 6, 7] with self.cached_session(): - self._runner( - init_ops.RandomUniform(minval=-1, maxval=1, seed=124), - tensor_shape, - target_mean=0., - target_max=1, - target_min=-1) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.RandomUniform(minval=-1, maxval=1, seed=124), + tensor_shape, + target_mean=0., + target_max=1, + target_min=-1) def test_normal(self): - tensor_shape = (8, 12, 99) + shape = (8, 12, 99) with self.cached_session(): - self._runner( - init_ops.RandomNormal(mean=0, stddev=1, seed=153), - tensor_shape, - target_mean=0., - target_std=1) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.RandomNormal(mean=0, stddev=1, seed=153), + tensor_shape, + target_mean=0., + target_std=1) def test_truncated_normal(self): - tensor_shape = (12, 99, 7) + shape = (12, 99, 7) with self.cached_session(): - self._runner( - init_ops.TruncatedNormal(mean=0, stddev=1, seed=126), - tensor_shape, - target_mean=0., - target_max=2, - target_min=-2) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.TruncatedNormal(mean=0, stddev=1, seed=126), + tensor_shape, + target_mean=0., + target_max=2, + target_min=-2) def test_constant(self): - tensor_shape = (5, 6, 4) + shape = (5, 6, 4) with self.cached_session(): - self._runner( - init_ops.Constant(2), - tensor_shape, - target_mean=2, - target_max=2, - target_min=2) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.Constant(2), + tensor_shape, + target_mean=2, + target_max=2, + target_min=2) def test_lecun_uniform(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - init_ops.lecun_uniform(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(1. / fan_in) + self._runner( + init_ops.lecun_uniform(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_glorot_uniform_initializer(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - init_ops.glorot_uniform_initializer(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, fan_out = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / (fan_in + fan_out)) + self._runner( + init_ops.glorot_uniform_initializer(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_he_uniform(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - init_ops.he_uniform(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / fan_in) + self._runner( + init_ops.he_uniform(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_lecun_normal(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - self._runner( - init_ops.lecun_normal(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(1. / fan_in) + self._runner( + init_ops.lecun_normal(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_glorot_normal_initializer(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, fan_out = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - self._runner( - init_ops.glorot_normal_initializer(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, fan_out = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / (fan_in + fan_out)) + self._runner( + init_ops.glorot_normal_initializer(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_he_normal(self): - tensor_shape = (5, 6, 4, 2) + shape = (5, 6, 4, 2) with self.cached_session(): - fan_in, _ = init_ops._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - self._runner( - init_ops.he_normal(seed=123), - tensor_shape, - target_mean=0., - target_std=std) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + fan_in, _ = init_ops._compute_fans(tensor_shape) + std = np.sqrt(2. / fan_in) + self._runner( + init_ops.he_normal(seed=123), + tensor_shape, + target_mean=0., + target_std=std) def test_Orthogonal(self): - tensor_shape = (20, 20) + shape = (20, 20) with self.cached_session(): - self._runner(init_ops.Orthogonal(seed=123), tensor_shape, target_mean=0.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner(init_ops.Orthogonal(seed=123), + tensor_shape, + target_mean=0.) def test_Identity(self): with self.cached_session(): - tensor_shape = (3, 4, 5) - with self.assertRaises(ValueError): + shape = (3, 4, 5) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + with self.assertRaises(ValueError): + self._runner( + init_ops.Identity(), + tensor_shape, + target_mean=1. / int(tensor_shape[0]), + target_max=1.) + + shape = (3, 3) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: self._runner( init_ops.Identity(), tensor_shape, - target_mean=1. / tensor_shape[0], + target_mean=1. / int(tensor_shape[0]), target_max=1.) - tensor_shape = (3, 3) - self._runner( - init_ops.Identity(), - tensor_shape, - target_mean=1. / tensor_shape[0], - target_max=1.) - def test_Zeros(self): - tensor_shape = (4, 5) + shape = (4, 5) with self.cached_session(): - self._runner( - init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner( + init_ops.Zeros(), tensor_shape, target_mean=0., target_max=0.) def test_Ones(self): - tensor_shape = (4, 5) + shape = (4, 5) with self.cached_session(): - self._runner(init_ops.Ones(), tensor_shape, target_mean=1., target_max=1.) + for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: + self._runner(init_ops.Ones(), tensor_shape, + target_mean=1., target_max=1.) if __name__ == '__main__': -- GitLab From bd81c8b19e04e0d5f3f28ca73b7f7e2f1b11fdca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Sat, 25 Aug 2018 10:59:00 +0800 Subject: [PATCH 006/351] BUG: initializer should supports TensorShape --- tensorflow/python/ops/init_ops.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 4d75ee3974..3aaac615fb 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -38,6 +38,7 @@ import numpy as np from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import gen_linalg_ops @@ -539,7 +540,8 @@ class Orthogonal(Initializer): num_rows = 1 for dim in shape[:-1]: num_rows *= dim - num_cols = shape[-1] + num_rows = int(num_rows) + num_cols = int(shape[-1]) flat_shape = (num_cols, num_rows) if num_rows < num_cols else (num_rows, num_cols) @@ -1107,6 +1109,8 @@ class Identity(Initializer): "Identity matrix initializer can only be used for 2D matrices.") if dtype is None: dtype = self.dtype + if isinstance(full_shape, tensor_shape.TensorShape): + full_shape = full_shape.as_list() initializer = linalg_ops_impl.eye(*full_shape, dtype=dtype) if partition_info is not None: initializer = array_ops.slice(initializer, partition_info.var_offset, @@ -1287,7 +1291,7 @@ def _compute_fans(shape): shape: Integer shape tuple or TF tensor shape. Returns: - A tuple of scalars (fan_in, fan_out). + A tuple of integer scalars (fan_in, fan_out). """ if len(shape) < 1: # Just to avoid errors for constants. fan_in = fan_out = 1 @@ -1299,12 +1303,12 @@ def _compute_fans(shape): else: # Assuming convolution kernels (2D, 3D, or more). # kernel shape: (..., input_depth, depth) - receptive_field_size = 1. + receptive_field_size = 1 for dim in shape[:-2]: receptive_field_size *= dim fan_in = shape[-2] * receptive_field_size fan_out = shape[-1] * receptive_field_size - return fan_in, fan_out + return int(fan_in), int(fan_out) def _assert_float_dtype(dtype): -- GitLab From 224b3e32b0ac552a0c2a083c803f60814efa850b Mon Sep 17 00:00:00 2001 From: Albin Joy Date: Mon, 21 Jan 2019 14:32:21 +0530 Subject: [PATCH 007/351] Removed twice declaration AttrTypeByName The interface AttrTypeByName was declared twice in attr_builder.h. --- tensorflow/core/common_runtime/eager/attr_builder.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h index aa64b5f59b..1b3fbcbd4a 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.h +++ b/tensorflow/core/common_runtime/eager/attr_builder.h @@ -54,10 +54,6 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out, Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, TF_AttrType* out, unsigned char* is_list); -// Looks for 'attr_name' in 'm' and sets 'out' and 'is_list'. -Status AttrTypeByName(const AttrTypeMap& m, const string& attr_name, - TF_AttrType* out, unsigned char* is_list); - // KernelAndDevice::Init needs a NodeDef only to pass the attribute map through. // An AttrBuilder is a convenience class to help with that - providing a smaller // interface than NodeDefBuilder and avoiding expensive (unnecessary?) sanity -- GitLab From 70298632d0e4e2b5cd3b9270dd44b2d1bb24a874 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Fri, 25 Jan 2019 10:54:35 +0800 Subject: [PATCH 008/351] Workaround MSVC bug that std::isnan cannot handle integral type --- .../xla/service/hlo_evaluator_typed_visitor.h | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 648c7d0e67..9fac9bb318 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -916,9 +916,29 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return HandleShiftRightLogical(shrl); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + // Special case for integral type due to MSVC's std::isnan being unable to + // handle integral type. + template ::value && + std::is_intergal::value>::type* = + nullptr> + Status HandleClamp(HloInstruction* clamp) { + std::function + clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { + return static_cast( + std::min(high, std::max(value, low))); + }; + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[clamp], + ElementwiseTernaryOp(clamp, + std::move(ConvertTernaryFunction(clamp_op)))); + return Status::OK(); + } + + template ::value && + !std::is_intergal::value>::type* = + nullptr> Status HandleClamp(HloInstruction* clamp) { std::function clamp_op = [](ElementwiseT low, ElementwiseT value, ElementwiseT high) { -- GitLab From e7ab0ddbf811866de60b8b317887f7000586b6d8 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:35:08 +0800 Subject: [PATCH 009/351] Add test for int64 clamping --- .../xla/service/hlo_evaluator_test.cc | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 590f76f472..94a1ce08d2 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -152,6 +152,33 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } +// Verifies that clamping of int64 does not cause loss of precision +TEST_P(HloEvaluatorBf16Test, DoesClamp) { + auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; + + auto low = + LiteralUtil::CreateR2({{0, ones(54)}, {ones(54), ones(58)}}); + auto value = LiteralUtil::CreateR2({{0, ones(56)}, {0, ones(58)}}); + auto high = LiteralUtil::CreateR2( + {{ones(54), ones(55)}, {ones(56), ones(58)}}); + + Shape shape = low.shape(); + HloComputation::Builder b(TestName()); + auto c1 = b.AddInstruction(HloInstruction::CreateConstant(std::move(low))); + auto c2 = b.AddInstruction(HloInstruction::CreateConstant(std::move(value))); + auto c3 = b.AddInstruction(HloInstruction::CreateConstant(std::move(high))); + b.AddInstruction( + HloInstruction::CreateTernary(shape, HloOpcode::kClamp, c1, c2, c3)); + m_->AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + auto expected = + LiteralUtil::CreateR2({{0, ones(55)}, {ones(54), ones(58)}}); + + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); +} + TEST_P(HloEvaluatorBf16Test, DISABLED_DoesClampSpecialBroadcast) { auto low = LiteralUtil::CreateR0(0.f); auto value = LiteralUtil::CreateR2({{-1.f, 0.f}, {1.f, 2.f}}); -- GitLab From 16a51f86a10e0dd2ef5395f510b8ed0b696531b3 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:38:01 +0800 Subject: [PATCH 010/351] Fix test name --- tensorflow/compiler/xla/service/hlo_evaluator_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 94a1ce08d2..f5452c9bd3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -153,7 +153,7 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { } // Verifies that clamping of int64 does not cause loss of precision -TEST_P(HloEvaluatorBf16Test, DoesClamp) { +TEST_P(HloEvaluatorBf16Test, DoesClampInt64) { auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; auto low = -- GitLab From a183c36fdf1e7bfb1585455517e6053aa61b867c Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Sat, 26 Jan 2019 09:48:33 +0800 Subject: [PATCH 011/351] Address nits --- tensorflow/compiler/xla/service/hlo_evaluator_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index f5452c9bd3..644dcd0ce7 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -154,7 +154,7 @@ TEST_P(HloEvaluatorBf16Test, DoesClamp) { // Verifies that clamping of int64 does not cause loss of precision TEST_P(HloEvaluatorBf16Test, DoesClampInt64) { - auto ones = [](int bits) -> int64 { return (1LL << bits) - 1; }; + auto ones = [](int bits) { return (int64{1} << bits) - 1; }; auto low = LiteralUtil::CreateR2({{0, ones(54)}, {ones(54), ones(58)}}); -- GitLab From 7b4860446e610cc704d44f077c40f931b2971ded Mon Sep 17 00:00:00 2001 From: Dayananda-V Date: Thu, 31 Jan 2019 10:45:39 +0530 Subject: [PATCH 012/351] TF Framework ops_test missing test case add 1-has_default_graph api test case 2-get_all_collection_keys api test case --- tensorflow/python/framework/ops_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 8347e9d1eb..10a2ce9bde 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1587,6 +1587,8 @@ class CollectionTest(test_util.TensorFlowTestCase): self.assertSequenceEqual(g.collections, ["key"]) g.add_to_collection("other", "foo") self.assertSequenceEqual(sorted(g.collections), ["key", "other"]) + self.assertSequenceEqual(sorted(g.get_all_collection_keys()), + ["key", "other"]) def test_add_to_collection(self): g = ops.Graph() @@ -2408,17 +2410,22 @@ class GraphTest(test_util.TensorFlowTestCase): def testDefaultGraph(self): orig = ops.get_default_graph() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) g0 = ops.Graph() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) context_manager_0 = g0.as_default() + self.assertFalse(ops.has_default_graph()) self._AssertDefault(orig) with context_manager_0 as g0: self._AssertDefault(g0) with ops.Graph().as_default() as g1: + self.assertTrue(ops.has_default_graph()) self._AssertDefault(g1) self._AssertDefault(g0) self._AssertDefault(orig) + self.assertFalse(ops.has_default_graph()) def testPreventFeeding(self): g = ops.Graph() -- GitLab From 058aa720d063795fbe3fa2c9aa26e95790fa349f Mon Sep 17 00:00:00 2001 From: Hoeseong Kim Date: Thu, 31 Jan 2019 15:48:13 +0900 Subject: [PATCH 013/351] added complex support for decode_raw --- tensorflow/core/kernels/decode_raw_op.cc | 2 ++ tensorflow/core/ops/parsing_ops.cc | 5 +++- .../python/kernel_tests/decode_raw_op_test.py | 26 +++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/decode_raw_op.cc b/tensorflow/core/kernels/decode_raw_op.cc index eaef5a6097..3dd019c3d2 100644 --- a/tensorflow/core/kernels/decode_raw_op.cc +++ b/tensorflow/core/kernels/decode_raw_op.cc @@ -110,6 +110,8 @@ REGISTER(uint8); REGISTER(int16); REGISTER(int8); REGISTER(int64); +REGISTER(complex64); +REGISTER(complex128); #undef REGISTER diff --git a/tensorflow/core/ops/parsing_ops.cc b/tensorflow/core/ops/parsing_ops.cc index eff453241d..169076a6f6 100644 --- a/tensorflow/core/ops/parsing_ops.cc +++ b/tensorflow/core/ops/parsing_ops.cc @@ -26,7 +26,10 @@ using shape_inference::ShapeHandle; REGISTER_OP("DecodeRaw") .Input("bytes: string") .Output("output: out_type") - .Attr("out_type: {half,float,double,int32,uint16,uint8,int16,int8,int64}") + .Attr( + "out_type: " + "{half,float,double,int32,uint16,uint8,int16,int8,int64,complex64," + "complex128}") .Attr("little_endian: bool = true") .SetShapeFn([](InferenceContext* c) { // Note: last dimension is data dependent. diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index 008e59ba3e..bb8d2cf6a0 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -89,6 +89,32 @@ class DecodeRawOpTest(test.TestCase): self.assertAllEqual(expected_result, result) + @test_util.run_deprecated_v1 + def testToComplex64(self): + with self.cached_session(): + in_bytes = array_ops.placeholder(dtypes.string, shape=[None]) + decode = parsing_ops.decode_raw(in_bytes, out_type=dtypes.complex64) + self.assertEqual([None, None], decode.get_shape().as_list()) + + expected_result = np.matrix([[1 + 1j, 2 - 2j, -3 + 3j, -4 - 4j]], + dtype=" Date: Sat, 2 Feb 2019 18:07:30 +0800 Subject: [PATCH 014/351] TST: minor fix --- tensorflow/python/ops/init_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/init_ops_test.py b/tensorflow/python/ops/init_ops_test.py index f562c59a97..0ef34976de 100644 --- a/tensorflow/python/ops/init_ops_test.py +++ b/tensorflow/python/ops/init_ops_test.py @@ -55,7 +55,7 @@ class InitializersTest(test.TestCase): self.assertGreater(lim, abs(output.min() - target_min)) def test_uniform(self): - shape = [9, 6, 7] + shape = (9, 6, 99) with self.cached_session(): for tensor_shape in [shape, tensor_shape_lib.TensorShape(shape)]: self._runner( -- GitLab From f2775491a138154e8f9898cbb5266ef49504e33e Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Sat, 2 Feb 2019 12:23:09 +0000 Subject: [PATCH 015/351] Don't dump the subgraph fingerprint into info log --- tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc index ec745cdbb7..5de519b568 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass.cc @@ -176,7 +176,7 @@ Status RewriteSubgraph(const std::vector& arg_source_tensors, string serialized; TF_RET_CHECK(SerializeToStringDeterministic(gdef, &serialized)); uint64 fingerprint = Fingerprint64(serialized); - LOG(INFO) << "Subgraph fingerprint:" << fingerprint; + VLOG(1) << "Subgraph fingerprint:" << fingerprint; call_def->set_op(absl::StrCat(call_def->op(), "_", fingerprint)); return Status::OK(); } -- GitLab From 3d13afe4ecd59396d5b438751cda1b2d3d12465b Mon Sep 17 00:00:00 2001 From: Vagif Date: Wed, 6 Feb 2019 15:46:49 +0900 Subject: [PATCH 016/351] Fix misleading comment in layer normalization Comment states that moments are calculated across the last dimension, however this is not true for convolutional layers, where the moments are calculated on all dimensions except the one with index 0. I changed the comment from "Calculate the moments on the last axis (layer activations)." to "By default, compute the moments across all the dimensions except the one with index 0." --- tensorflow/contrib/layers/python/layers/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 403b522ce4..9d9524e4e4 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -2308,7 +2308,7 @@ def layer_norm(inputs, initializer=init_ops.ones_initializer(), collections=gamma_collections, trainable=trainable) - # Calculate the moments on the last axis (layer activations). + # By default, compute the moments across all the dimensions except the one with index 0. norm_axes = list(range(begin_norm_axis, inputs_rank)) mean, variance = nn.moments(inputs, norm_axes, keep_dims=True) # Compute layer normalization using the batch_normalization function. -- GitLab From f9b1bdcc9597fc5ec59d74c88c58cfaa9326b585 Mon Sep 17 00:00:00 2001 From: Dan Lazewatsky Date: Wed, 6 Feb 2019 11:27:57 -0500 Subject: [PATCH 017/351] Update docstring parser -> parses --- tensorflow/python/tools/freeze_graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index 06a6e7dc08..ab82ee9fd4 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -240,7 +240,7 @@ def freeze_graph_with_def_protos(input_graph_def, def _parse_input_graph_proto(input_graph, input_binary): - """Parser input tensorflow graph into GraphDef proto.""" + """Parses input tensorflow graph into GraphDef proto.""" if not gfile.Exists(input_graph): print("Input graph file '" + input_graph + "' does not exist!") return -1 @@ -255,7 +255,7 @@ def _parse_input_graph_proto(input_graph, input_binary): def _parse_input_meta_graph_proto(input_graph, input_binary): - """Parser input tensorflow graph into MetaGraphDef proto.""" + """Parses input tensorflow graph into MetaGraphDef proto.""" if not gfile.Exists(input_graph): print("Input meta graph file '" + input_graph + "' does not exist!") return -1 @@ -271,7 +271,7 @@ def _parse_input_meta_graph_proto(input_graph, input_binary): def _parse_input_saver_proto(input_saver, input_binary): - """Parser input tensorflow Saver into SaverDef proto.""" + """Parses input tensorflow Saver into SaverDef proto.""" if not gfile.Exists(input_saver): print("Input saver file '" + input_saver + "' does not exist!") return -1 -- GitLab From 2dd2a69ad948fea43139fd6d76a2c7c06c027415 Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 7 Feb 2019 08:23:32 +0530 Subject: [PATCH 018/351] Added mobilenetV2 in keras application test --- tensorflow/python/keras/applications/applications_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/keras/applications/applications_test.py b/tensorflow/python/keras/applications/applications_test.py index b15ca5990a..9bf41c5219 100644 --- a/tensorflow/python/keras/applications/applications_test.py +++ b/tensorflow/python/keras/applications/applications_test.py @@ -32,8 +32,7 @@ MODEL_LIST = [ (applications.InceptionV3, 2048), (applications.InceptionResNetV2, 1536), (applications.MobileNet, 1024), - # TODO(fchollet): enable MobileNetV2 tests when a new TensorFlow test image - # is released with keras_applications upgraded to 1.0.5 or above. + (applications.MobileNetV2, 1280), (applications.DenseNet121, 1024), (applications.DenseNet169, 1664), (applications.DenseNet201, 1920), -- GitLab From 72539914200b45ecad344d9c632b87c89ee8052b Mon Sep 17 00:00:00 2001 From: Amit <30853054+amitsrivastava78@users.noreply.github.com> Date: Fri, 8 Feb 2019 17:54:50 +0530 Subject: [PATCH 019/351] Updated import_tensorflow.cc Fixed the type errors --- tensorflow/lite/toco/import_tensorflow.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc index 813e439995..17408a3c37 100644 --- a/tensorflow/lite/toco/import_tensorflow.cc +++ b/tensorflow/lite/toco/import_tensorflow.cc @@ -1346,7 +1346,7 @@ tensorflow::Status ConvertUnsupportedOperator( } // Parse outputs. Name them after the node's name, plus an ordinal suffix. - // Note that some outputs are to be multipled by a named attribute. + // Note that some outputs are to be multiplied by a named attribute. const tensorflow::OpDef* op_def = nullptr; if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) { GetOutputNamesFromNodeDef(node, *op_def, op); @@ -1480,7 +1480,7 @@ tensorflow::Status ConvertPlaceholderOperator( if (node.attr().count("shape")) { const auto& shape = GetShapeAttr(node, "shape"); auto num_dims = shape.dim_size(); - // TODO(b/62716978): This logic needs to be revisted. During dims + // TODO(b/62716978): This logic needs to be revisited. During dims // refactoring it is an interim fix. if (num_dims > 0 && !HasWildcardDimension(shape)) { auto& dst_array_dims = *array.mutable_shape()->mutable_dims(); -- GitLab From 8f7a2ca8ad651eccf5e07ddf077238dbc5e0dd66 Mon Sep 17 00:00:00 2001 From: P Sudeepam Date: Sat, 9 Feb 2019 22:47:40 +0530 Subject: [PATCH 020/351] modified the documentation of glorot_normal initializer to specify that the 'stddev' argument refers to the standard deviation of the truncated normal distribution --- tensorflow/python/ops/init_ops.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index a4cebc8d58..ba373872ce 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -1263,9 +1263,10 @@ class GlorotNormal(VarianceScaling): """The Glorot normal initializer, also called Xavier normal initializer. It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(2 / (fan_in + fan_out))` - where `fan_in` is the number of input units in the weight tensor - and `fan_out` is the number of output units in the weight tensor. + with standard deviation (after truncation) given by + `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number + of input units in the weight tensor and `fan_out` is the number of + output units in the weight tensor. Args: seed: A Python integer. Used to create random seeds. See -- GitLab From 76927b8f455d5af153370d17e77b2caca5d3bdd3 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 29 Jan 2019 18:05:23 -0800 Subject: [PATCH 021/351] Add CheckInputsWeights method to validate number of inputs and whether they must be weights. --- .../compiler/tf2tensorrt/convert/convert_nodes.cc | 11 +++++++++++ .../compiler/tf2tensorrt/convert/convert_nodes.h | 3 +++ 2 files changed, 14 insertions(+) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index ce8a69b0d3..7654539f69 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2583,6 +2583,17 @@ Status ConvertQuantize(OpConverterParams* params) { {"input_max", true}, {"num_bits", true}})); } + if (node_def.op() == "FakeQuantWithMinMaxArgs") { + TF_RETURN_IF_ERROR(CheckInputsWeights(params, {{"input", false}})); + } else if (node_def.op() == "FakeQuantWithMinMaxVars" || + node_def.op() == "QuantizeAndDequantizeV2") { + TF_RETURN_IF_ERROR(CheckInputsWeights( + params, {{"input", false}, {"min", true}, {"max", true}})); + } else if (node_def.op() == "QuantizeAndDequantizeV3") { + TF_RETURN_IF_ERROR(CheckInputsWeights( + params, + {{"input", false}, {"min", true}, {"max", true}, {"num_bits", true}})); + } float min_range = 0.0f; float max_range = 0.0f; if (node_def.op() == "FakeQuantWithMinMaxArgs") { diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index d1e30eb848..9b5ad51aa1 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -521,6 +521,9 @@ class Converter { // Tensors/weights added during construction of trt_network_. std::unordered_map trt_tensors_; + // Input constants which have been converted to tensors via IConstLayer + std::unordered_map trt_tensors_from_constants_; + // Special op converter for custom plugins. OpConverter plugin_converter_; -- GitLab From 303813f4c283bc62d44bbf2a22a0e521b24229ed Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 30 Jan 2019 16:48:13 -0800 Subject: [PATCH 022/351] Add Slice and unify StridedSlice and Slice Calculate output dimensions' Finish implementing fix typo Current unit tests now passing for StridedSlice. negative stride stuff Not supporting negative strides for now. Fix some bugs and clean up --- .../tf2tensorrt/convert/convert_nodes.cc | 279 +++++++++----- .../tf2tensorrt/convert/convert_nodes.h | 6 + .../tf2tensorrt/convert/convert_nodes_test.cc | 349 +++++++++++------- 3 files changed, 408 insertions(+), 226 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 7654539f69..564236f5c0 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -392,6 +392,15 @@ inline bool DimsEqual(const nvinfer1::Dims& dim_l, return true; } +bool AllLengthsEqual(const std::vector>& inputs) { + if (inputs.size() == 0) return true; + int length = inputs.at(0).size(); + for (int i = 1; i < inputs.size(); i++) { + if (inputs.at(i).size() != length) return false; + } + return true; +} + inline nvinfer1::Dims GetTrtDimsForTensor(const tensorflow::Tensor& tensor) { nvinfer1::Dims dims; dims.nbDims = tensor.dims(); @@ -2151,100 +2160,80 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) { return tensorflow::Status::OK(); } -// Gets the bounds (start or end) from the weights of a StridedSlice op. -tensorflow::Status GetStridedSliceBound(const std::vector& input_dims, - const TRT_ShapedWeights& bound_weights, - int mask, bool begin, string node_name, - std::vector* output_bound) { - const string bound_name = (begin) ? "begin" : "end"; - const int* weights_ptr = static_cast(bound_weights.GetValues()); - *output_bound = - std::vector(weights_ptr, weights_ptr + bound_weights.count()); - if (output_bound->size() != input_dims.size()) { - return tensorflow::errors::InvalidArgument( - "StridedSlice \"", bound_name, "\" specified ", - std::to_string(output_bound->size()), " dimensions, but input rank is ", - std::to_string(input_dims.size()), ", at ", node_name); - } - for (int i = 0; i < output_bound->size(); i++) { - if ((1 << i) & mask) { - // Apply mask. - (*output_bound)[i] = (begin) ? 0 : input_dims[i]; - // Masked bound will always result in a valid, non-negative bound, so we - // don't need the following checks. For the common case of using masks on - // a undefined batch dim (-1), we specifically don't want to do the - // following checks because they will erroneously detect an out of range - // bound or try to correct the negative value. - continue; - } - // Make sure bound is valid. - if (((*output_bound)[i] < -input_dims[i]) || - ((*output_bound)[i] > input_dims[i])) { - return tensorflow::errors::InvalidArgument( - bound_name, " value of ", std::to_string((*output_bound)[i]), - " for StridedSlice is invalid, must be in the range " - "[-dim_size(i), dim_size(i)], at ", - node_name); - } - // Convert negative values to their positive equivalent. - if ((*output_bound)[i] < 0) { - (*output_bound)[i] += input_dims[i]; - } - } - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { - const auto& inputs = params->inputs; +tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, + const TRT_TensorOrWeights& input, + const std::vector& begin, + const std::vector& size, + const std::vector& stride) { const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( *params, {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}})); // Get input dims. - nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); + nvinfer1::Dims dims = input.GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); - if (inputs.at(0).is_tensor()) { - // Temporarily add batch dimension so that indexes line up properly. - input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); - } - if (input_dims.size() > 4) { + // Temporarily add batch dimension so that indexes line up properly. + input_dims.insert(input_dims.begin(), input.batch_size()); + // Check that batch dimension is not going to be modified. + if (begin[0] != 0 || size[0] != input_dims[0] || stride[0] != 1) { return tensorflow::errors::Unimplemented( - "StridedSlice is not implemented for tensors with rank > 4, at ", + "TensorRT does not allow modifications to the batch dimension, at ", node_def.name()); } - TFAttrs attrs(node_def); - // Get begin and end bounds per axis. - std::vector begin, end; - TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(1).weights(), - attrs.get("begin_mask"), true, - node_def.name(), &begin)); - TF_RETURN_IF_ERROR(GetStridedSliceBound(input_dims, inputs.at(2).weights(), - attrs.get("end_mask"), false, - node_def.name(), &end)); - // Get strides per axis (must all be 1). - TRT_ShapedWeights stride_weights = inputs.at(3).weights(); - const int* stride_weights_ptr = static_cast(stride_weights.GetValues()); - std::vector strides(stride_weights_ptr, - stride_weights_ptr + stride_weights.count()); - for (int x : strides) { - if (x != 1) { - return tensorflow::errors::Unimplemented( - "StridedSlice is only implemented for stride of 1, at ", + // Check bounds. + for (int i = 1; i < input_dims.size(); i++) { + if (begin[i] < 0 || begin[i] > input_dims[i]) { + return tensorflow::errors::InvalidArgument( + "\"begin\" for dimension ", std::to_string(i), " in ", node_def.op(), + " must be in the range [0, dims(i)), at ", node_def.name()); + } + const int end = begin[i] + size[i]; + if (end < 0 || end > input_dims[i]) { + return tensorflow::errors::InvalidArgument( + "\"begin\" + \"size\" for dimension ", std::to_string(i), " in ", + node_def.op(), " nmust be in the range [0, dims(i)], at ", + node_def.name()); + } + if (size[i] <= 0) { + return tensorflow::errors::InvalidArgument( + "\"size\" cannot be negative or zero for ", node_def.op(), ", at ", node_def.name()); } } - // Unsupported mask options. - for (const string& attr : - {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) { - int attr_val = attrs.get(attr); - if (attr_val != 0) { +// TRT 5.1 adds a slice layer. For older versions, we attempt to use the +// padding layer with negative padding. +#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) + // Use ISliceLayer. + nvinfer1::Dims begin_dims, size_dims, stride_dims; + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims, true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims, true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims, true)); + if (params->validation_only) return Status::OK(); + + nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice( + *const_cast(input.tensor()), begin_dims, size_dims, + stride_dims); + params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0))); + return tensorflow::Status::OK(); +#else + // Use IPaddingLayer. + // Strides must be 1 in this case. + for (int x : stride) { + if (x != 1) { return tensorflow::errors::Unimplemented( - attr, " is not supported for StridedSlice, at ", node_def.name()); + "Strides other than 1 are not supported with this version of TRT, " + "at ", + node_def.name()); } } - - nvinfer1::ITensor* tensor = - const_cast(inputs.at(0).tensor()); + // Rank must be 2, 3 or 4. + if (input_dims.size() > 4) { + return tensorflow::errors::Unimplemented(node_def.op(), + " for tensors with rank > 4 is " + "not supported in this version of " + "TRT, at ", + node_def.name()); + } // Reshape if necessary to 4-D, since IPaddingLayer requires a 4-D input. const bool need_reshape = (input_dims.size() != 4); int reshape_dims_added = 0; @@ -2254,23 +2243,16 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { while (input_dims.size() < 4) { input_dims.insert(input_dims.begin() + 1, 1); begin.insert(begin.begin() + 1, 0); - end.insert(end.begin() + 1, 1); + size.insert(size.begin() + 1, 1); reshape_dims_added++; } - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims, - /*ignore_first_dim=*/true)); + TF_RETURN_IF_ERROR( + TensorShapeArrayToTrtDims(input_dims, &reshape_dims, true)); } // Find dimensions which need to be sliced. std::vector pad_dims; - for (int i = 0; i < input_dims.size(); i++) { - if ((begin[i] != 0) || (end[i] != input_dims[i])) { - if (i == 0) { - return tensorflow::errors::Unimplemented( - "StridedSlice can't modify batch dim, at ", node_def.name()); - } else if ((end[i] - begin[i]) < 0) { - return tensorflow::errors::InvalidArgument( - "New size of sliced dimension is negative, at ", node_def.name()); - } + for (int i = 1; i < input_dims.size(); i++) { + if ((begin[i] != 0) || (begin[i] + size[i] != input_dims[i])) { pad_dims.push_back(i); } } @@ -2291,16 +2273,19 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { } } else if (pad_dims.size() > 2) { return tensorflow::errors::Unimplemented( - "StridedSlice can only modify 2 dimensions, at ", node_def.name()); + node_def.op(), + " can only modify up to 2 dimensions in this version of TRT, at ", + node_def.name()); } std::sort(pad_dims.begin(), pad_dims.end()); // Convert to pre/post padding values. Since TRT does not have a StridedSlice - // or Slice layer, we instead create an IPaddingLayer with negative padding. + // or Slice layer prior to 5.1, we instead create an IPaddingLayer with + // negative padding. nvinfer1::DimsHW pre_padding, post_padding; for (int i = 0; i < pad_dims.size(); i++) { const int axis = pad_dims[i]; pre_padding.d[i] = -begin[axis]; - post_padding.d[i] = end[axis] - input_dims[axis]; + post_padding.d[i] = (begin[axis] + size[axis]) - input_dims[axis]; } // IPaddingLayer will always apply the padding to dims 2,3 (input format is @@ -2320,10 +2305,11 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { if (params->validation_only) return Status::OK(); // Start conversion. + nvinfer1::ITensor* tensor = const_cast(input.tensor()); if (need_reshape) { const nvinfer1::ITensor* output_tensor = nullptr; TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape( - inputs.at(0), reshape_dims, &output_tensor)); + input, reshape_dims, &output_tensor)); tensor = const_cast(output_tensor); } if (need_transpose) { @@ -2332,7 +2318,6 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { tensor, transpose_order, &output_tensor)); tensor = const_cast(output_tensor); } - // Add padding layer nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding( *const_cast(tensor), pre_padding, post_padding); @@ -2340,7 +2325,6 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { params->converter->MarkQuantizationRangesAsInferrable(tensor, layer->getOutput(0)); tensor = layer->getOutput(0); - // Restore transpose if (need_transpose) { const nvinfer1::ITensor* output_tensor = nullptr; @@ -2353,7 +2337,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { // Calculate output dimensions for (int i = 0; i < pad_dims.size(); i++) { const int axis = pad_dims[i]; - input_dims[axis] = end[axis] - begin[axis]; + input_dims[axis] = size[axis]; } // Remove added 1 dimensions for (int i = 0; i < reshape_dims_added; i++) { @@ -2366,8 +2350,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { } nvinfer1::Dims new_dims; - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims, - /*ignore_first_dim=*/true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims, true)); const nvinfer1::ITensor* output_tensor = nullptr; TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape( TRT_TensorOrWeights(tensor), new_dims, &output_tensor)); @@ -2377,6 +2360,102 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { params->outputs->push_back( TRT_TensorOrWeights(const_cast(tensor))); return tensorflow::Status::OK(); +#endif +} + +tensorflow::Status ConvertSlice(OpConverterParams* params) { + const auto& inputs = params->inputs; + const auto& node_def = params->node_def; + TF_RETURN_IF_ERROR(CheckInputsWeights( + params, {{"input", false}, {"begin", true}, {"size", true}})); + std::vector begin, size; + inputs.at(1).weights().ToVector(&begin); + inputs.at(2).weights().ToVector(&size); + // Get input dims. + nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); + std::vector input_dims(dims.d, dims.d + dims.nbDims); + // Add batch dimension so that indexes line up properly. + input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); + if (!AllLengthsEqual({input_dims, begin, size})) { + return tensorflow::errors::InvalidArgument( + "Length of begin and size arguments must equal rank of input for " + "Slice, at ", node_def.name()); + } + // Size of -1 signifies to take all remaining elements. + for (int i = 0; i < input_dims.size(); i++) { + if (size[i] == -1) { + size[i] = input_dims[i] - begin[i]; + } + } + // Stride is 1 for all dims. + std::vector stride(begin.size(), 1); + return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride); +} + +tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { + const auto& inputs = params->inputs; + const auto& node_def = params->node_def; + TF_RETURN_IF_ERROR(CheckInputsWeights( + params, + {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}})); + // Get input dims. + nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); + std::vector input_dims(dims.d, dims.d + dims.nbDims); + // Add batch dimension so that indexes line up properly. + input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); + // Get begin and end bounds per axis. + TFAttrs attrs(node_def); + std::vector begin, end, stride; + inputs.at(1).weights().ToVector(&begin); + inputs.at(2).weights().ToVector(&end); + inputs.at(3).weights().ToVector(&stride); + if (!AllLengthsEqual({input_dims, begin, end, stride})) { + return tensorflow::errors::InvalidArgument( + "Length of begin, end, and stride arguments must equal rank of input " + "for StridedSlice, at ", node_def.name()); + } + // Unsupported mask options. + for (const string& attr : + {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) { + int attr_val = attrs.get(attr); + if (attr_val != 0) { + return tensorflow::errors::Unimplemented( + attr, " is not supported for StridedSlice, at ", node_def.name()); + } + } + // Standarize begin and end bounds by applying masks, making negative values + // positive, and correcting out of bounds ranges (StridedSlice does this + // silently). + for (int i = 0; i < input_dims.size(); i++) { + // Begin + if ((1 << i) & attrs.get("begin_mask")) { + begin[i] = 0; + } else if (begin[i] < 0) { + begin[i] += input_dims[i]; + } + begin[i] = std::max(0, std::min(begin[i], input_dims[i])); + // End + if ((1 << i) & attrs.get("end_mask")) { + end[i] = input_dims[i]; + } else if (end[i] < 0) { + end[i] += input_dims[i]; + } + end[i] = std::max(0, std::min(end[i], input_dims[i])); + } + // Negative strides currently not supported. + for (int i = 0; i < input_dims.size(); i++) { + if (stride[i] < 0) { + return tensorflow::errors::Unimplemented( + "Negative strides are not supported for StridedSlice, at ", node_def.op()); + } + } + // TRT Slice layer uses (begin, size) instead of (begin, end) + std::vector size(input_dims.size()); + for (int i = 0; i < input_dims.size(); i++) { + // Divide by stride (round up) + size[i] = (end[i] - begin[i] + stride[i] - 1) / stride[i]; + } + return ConvertStridedSliceHelper(params, inputs.at(0), begin, size, stride); } tensorflow::Status ConvertConv2D(OpConverterParams* params) { diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index 9b5ad51aa1..6e97025612 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -190,6 +190,12 @@ class TRT_ShapedWeights { string DebugString() const; + template + void ToVector(std::vector* output) const { + const T* weights_ptr = static_cast(const_cast(GetValues())); + *output = std::vector(weights_ptr, weights_ptr + count()); + } + // TODO(aaroey): make these private. nvinfer1::Dims shape_; // Note: shape.type[] is not used. tensorflow::DataType type_; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index bb1341ada3..beaf3b1c6c 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2606,46 +2606,37 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { AddTestWeights("strides", {4}, {1, 1, 1, 1}); RunValidationAndConversion( node_def, error::UNIMPLEMENTED, - "StridedSlice can't modify batch dim, at my_strided_slice"); + "TensorRT does not allow modifications to the batch dimension, at " + "my_strided_slice"); } { - // Stride is not 1, should fail. + // Modify batch dim with dynamic batch size, should fail. Reset(); NodeDef node_def = get_strided_slice_nodedef(); - AddTestTensor("input", {1, 2, 3}); + AddTestTensor("input", {1, 2, 3}, -1); AddTestWeights("begin", {4}, {0, 0, 0, 0}); - AddTestWeights("end", {4}, {1, 1, 2, 3}); - AddTestWeights("strides", {4}, {1, 2, -1, 3}); - RunValidationAndConversion(node_def, error::UNIMPLEMENTED, - "StridedSlice is only implemented for stride of " - "1, at my_strided_slice"); - } - { - // Begin out of bounds, should fail. - Reset(); - NodeDef node_def = get_strided_slice_nodedef(); - AddTestTensor("input", {1, 2, 3}); - AddTestWeights("begin", {4}, {1, 2, 3, 4}); AddTestWeights("end", {4}, {0, 1, 2, 3}); AddTestWeights("strides", {4}, {1, 1, 1, 1}); RunValidationAndConversion( - node_def, error::INVALID_ARGUMENT, - "begin value of 2 for StridedSlice is invalid, must be in the range " - "[-dim_size(i), dim_size(i)], at my_strided_slice"); + node_def, error::UNIMPLEMENTED, + "TensorRT does not allow modifications to the batch dimension, at " + "my_strided_slice"); } +// TRT 5.1+ supports strides +#if NV_TENSORRT_MAJOR < 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR < 1) { - // End out of bounds, should fail. + // Stride is not 1, should fail. Reset(); NodeDef node_def = get_strided_slice_nodedef(); AddTestTensor("input", {1, 2, 3}); AddTestWeights("begin", {4}, {0, 0, 0, 0}); - AddTestWeights("end", {4}, {1, 2, 3, 4}); - AddTestWeights("strides", {4}, {1, 1, 1, 1}); - RunValidationAndConversion( - node_def, error::INVALID_ARGUMENT, - "end value of 2 for StridedSlice is invalid, must be in the range " - "[-dim_size(i), dim_size(i)], at my_strided_slice"); + AddTestWeights("end", {4}, {1, 1, 2, 3}); + AddTestWeights("strides", {4}, {1, 2, 1, 3}); + RunValidationAndConversion(node_def, error::UNIMPLEMENTED, + "StridedSlice is only implemented for stride of " + "1, at my_strided_slice"); } +#endif { // Size of sliced dim is negative, should fail. Reset(); @@ -2656,16 +2647,17 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { AddTestWeights("strides", {4}, {1, 1, 1, 1}); RunValidationAndConversion( node_def, error::INVALID_ARGUMENT, - "New size of sliced dimension is negative, at my_strided_slice"); + "\"size\" cannot be negative or zero for StridedSlice, at my_strided_slice"); } struct TestParams { std::vector input_dims; - std::vector expected_output_dims; std::vector begin; std::vector end; + std::vector strides; int begin_mask; int end_mask; + std::vector expected_output_dims; std::vector expected_output; }; @@ -2678,104 +2670,148 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { }; // Ok. +#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) + const int kStridedSliceOKCases = 22; +#else const int kStridedSliceOKCases = 18; +#endif TestParams ok_params[kStridedSliceOKCases] = { - // 2D Crop. - TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2}, - /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 1, 2}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 0, 0}), - /*expected_output=*/{1, 2}}, - TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2}, - /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 1, 1}), - /*expected_output=*/{5, 6}}, - TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 2}, - /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 0, 0}), - /*expected_output=*/{5, 6}}, - // 2D Crop, with transpose. - TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1}, - /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 2, 1}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 0, 0}), - /*expected_output=*/{1, 2}}, - TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 2, 1}, - /*begin=*/{0, 1, 1, 0}, /*end=*/{0, 2, 3, 1}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 0, 0}), - /*expected_output=*/{5, 6}}, - TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2}, - /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 1, 2}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 0, 0}), - /*expected_output=*/{1, 2}}, - TestParams{/*input_dims=*/{2, 1, 3}, /*expected_output_dims=*/{1, 1, 2}, - /*begin=*/{0, 1, 0, 1}, /*end=*/{0, 2, 1, 3}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 0, 0}), - /*expected_output=*/{5, 6}}, - // 2D Crop, with reshape. - TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2}, - /*begin=*/{0, 0, 0}, /*end=*/{0, 1, 2}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 0}), - /*expected_output=*/{1, 2}}, - TestParams{/*input_dims=*/{2, 3}, /*expected_output_dims=*/{1, 2}, - /*begin=*/{0, 1, 1}, /*end=*/{0, 0, 0}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 1}), - /*expected_output=*/{5, 6}}, - // 1D Crop. - TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 2, 2}, - /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 0, 2}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 1, 0}), - /*expected_output=*/{1, 2, 4, 5}}, - TestParams{/*input_dims=*/{1, 2, 3}, /*expected_output_dims=*/{1, 1, 3}, - /*begin=*/{0, 0, 1, 0}, /*end=*/{0, 0, 0, 0}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 1, 1}), - /*expected_output=*/{4, 5, 6}}, - // 1D Crop, with transpose. - TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1}, - /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 0, 0}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 1, 1}), - /*expected_output=*/{1, 2, 3}}, - TestParams{/*input_dims=*/{2, 3, 1}, /*expected_output_dims=*/{1, 3, 1}, - /*begin=*/{0, 1, 0, 0}, /*end=*/{0, 0, 0, 0}, - /*begin_mask=*/get_mask({0, 0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 1, 1}), - /*expected_output=*/{4, 5, 6}}, - // 1D Crop, with reshape. - TestParams{/*input_dims=*/{6}, /*expected_output_dims=*/{3}, - /*begin=*/{0, 0}, /*end=*/{0, 3}, - /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), - /*expected_output=*/{1, 2, 3}}, - TestParams{/*input_dims=*/{1, 6}, /*expected_output_dims=*/{1, 3}, - /*begin=*/{0, 0, 2}, /*end=*/{0, 0, 5}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 1, 0}), - /*expected_output=*/{3, 4, 5}}, - TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1}, - /*begin=*/{0, 2, 0}, /*end=*/{0, 5, 0}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 1}), - /*expected_output=*/{3, 4, 5}}, - // Negative axis. - TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{3, 1}, - /*begin=*/{0, -6, 0}, /*end=*/{0, -3, 0}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 1}), - /*expected_output=*/{1, 2, 3}}, - TestParams{/*input_dims=*/{6, 1}, /*expected_output_dims=*/{5, 1}, - /*begin=*/{0, 0, 0}, /*end=*/{0, -1, 0}, - /*begin_mask=*/get_mask({0, 0, 0}), - /*end_mask=*/get_mask({1, 0, 1}), - /*expected_output=*/{1, 2, 3, 4, 5}}, + // 2D Crop. + TestParams{/*input_dims=*/{1, 2, 3}, /*begin=*/{0, 0, 0, 0}, + /*end=*/{0, 0, 1, 2}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 0, 0}), + /*expected_output_dims=*/{1, 1, 2}, /*expected_output=*/{1, 2}}, + TestParams{ + /*input_dims=*/{1, 2, 3}, + /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 1, 2}, + /*expected_output=*/{5, 6}}, + TestParams{ + /*input_dims=*/{1, 2, 3}, + /*begin=*/{0, 0, 1, 1}, /*end=*/{0, 1, 2, 3}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 0, 0}), /*expected_output_dims=*/{1, 1, 2}, + /*expected_output=*/{5, 6}}, + // 2D Crop, with transpose. + TestParams{ + /*input_dims=*/{2, 3, 1}, + /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 2, 1}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 2, 1}, + /*expected_output=*/{1, 2}}, + TestParams{ + /*input_dims=*/{2, 3, 1}, + /*begin=*/{0, 1, 1, 0}, /*end=*/{0, 2, 3, 1}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 2, 1}, + /*expected_output=*/{5, 6}}, + TestParams{ + /*input_dims=*/{2, 1, 3}, + /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 1, 2}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 1, 2}, + /*expected_output=*/{1, 2}}, + TestParams{ + /*input_dims=*/{2, 1, 3}, + /*begin=*/{0, 1, 0, 1}, /*end=*/{0, 2, 1, 3}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0, 0}), /*expected_output_dims=*/{1, 1, 2}, + /*expected_output=*/{5, 6}}, + // 2D Crop, with reshape. + TestParams{/*input_dims=*/{2, 3}, + /*begin=*/{0, 0, 0}, /*end=*/{0, 1, 2}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0}), + /*expected_output_dims=*/{1, 2}, + /*expected_output=*/{1, 2}}, + TestParams{/*input_dims=*/{2, 3}, + /*begin=*/{0, 1, 1}, /*end=*/{0, 0, 0}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 1}), + /*expected_output_dims=*/{1, 2}, + /*expected_output=*/{5, 6}}, + // 1D Crop. + TestParams{ + /*input_dims=*/{1, 2, 3}, + /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 0, 0, 2}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 1, 0}), /*expected_output_dims=*/{1, 2, 2}, + /*expected_output=*/{1, 2, 4, 5}}, + TestParams{ + /*input_dims=*/{1, 2, 3}, + /*begin=*/{0, 0, 1, 0}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 1, 3}, + /*expected_output=*/{4, 5, 6}}, + // 1D Crop, with transpose. + TestParams{ + /*input_dims=*/{2, 3, 1}, + /*begin=*/{0, 0, 0, 0}, /*end=*/{0, 1, 0, 0}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 1, 1}), /*expected_output_dims=*/{1, 3, 1}, + /*expected_output=*/{1, 2, 3}}, + TestParams{ + /*input_dims=*/{2, 3, 1}, + /*begin=*/{0, 1, 0, 0}, /*end=*/{0, 0, 0, 0}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 1, 1}), /*expected_output_dims=*/{1, 3, 1}, + /*expected_output=*/{4, 5, 6}}, + // 1D Crop, with reshape. + TestParams{/*input_dims=*/{6}, + /*begin=*/{0, 0}, /*end=*/{0, 3}, /*strides=*/{1, 1}, + /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), + /*expected_output_dims=*/{3}, + /*expected_output=*/{1, 2, 3}}, + TestParams{/*input_dims=*/{1, 6}, + /*begin=*/{0, 0, 2}, /*end=*/{0, 0, 5}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 1, 0}), + /*expected_output_dims=*/{1, 3}, + /*expected_output=*/{3, 4, 5}}, + TestParams{/*input_dims=*/{6, 1}, + /*begin=*/{0, 2, 0}, /*end=*/{0, 5, 0}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 1}), + /*expected_output_dims=*/{3, 1}, + /*expected_output=*/{3, 4, 5}}, + // Negative axis. + TestParams{/*input_dims=*/{6, 1}, + /*begin=*/{0, -6, 0}, /*end=*/{0, -3, 0}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 1}), + /*expected_output_dims=*/{3, 1}, + /*expected_output=*/{1, 2, 3}}, + TestParams{/*input_dims=*/{6, 1}, + /*begin=*/{0, 0, 0}, /*end=*/{0, -1, 0}, /*strides=*/{1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 1}), + /*expected_output_dims=*/{5, 1}, + /*expected_output=*/{1, 2, 3, 4, 5}}, +#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) + // Strides + TestParams{/*input_dims=*/{6}, + /*begin=*/{0, 0}, /*end=*/{0, 5}, /*strides=*/{1, 2}, + /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), + /*expected_output_dims=*/{3}, + /*expected_output=*/{1, 3, 5}}, + TestParams{/*input_dims=*/{6}, + /*begin=*/{0, 0}, /*end=*/{0, 6}, /*strides=*/{1, 2}, + /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), + /*expected_output_dims=*/{3}, + /*expected_output=*/{1, 3, 5}}, + TestParams{/*input_dims=*/{6}, + /*begin=*/{0, 1}, /*end=*/{0, 6}, /*strides=*/{1, 2}, + /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), + /*expected_output_dims=*/{3}, + /*expected_output=*/{2, 4, 6}}, + TestParams{/*input_dims=*/{6}, + /*begin=*/{0, 2}, /*end=*/{0, 6}, /*strides=*/{1, 3}, + /*begin_mask=*/get_mask({0, 0}), /*end_mask=*/get_mask({1, 0}), + /*expected_output_dims=*/{2}, + /*expected_output=*/{3, 6}}, +#endif }; for (int i = 0; i < kStridedSliceOKCases; i++) { @@ -2788,9 +2824,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { ok_params[i].begin); AddTestWeights("end", {static_cast(ok_params[i].end.size())}, ok_params[i].end); - std::vector strides(ok_params[i].input_dims.size(), 1); - AddTestWeights("strides", {static_cast(strides.size())}, - strides); + AddTestWeights("strides", {static_cast(ok_params[i].strides.size())}, + ok_params[i].strides); RunValidationAndConversion(node_def); TRT_TensorOrWeights output; @@ -2807,6 +2842,68 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { } } +TEST_F(OpConverterTest, ConvertSlice) { + // Get nodedef for Slice layer. + auto get_slice_nodedef = + []() -> NodeDef { + Scope s = Scope::NewRootScope(); + auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); + auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32); + auto size = ops::Placeholder(s.WithOpName("size"), DT_INT32); + auto slice = ops::Slice(s.WithOpName("my_slice"), input, begin, size); + return slice.operation.node()->def(); + }; + + { + // Begin is below bounds, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, -1, 0}); + AddTestWeights("size", {4}, {1, 1, 2, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "ellipsis_mask is not supported for StridedSlice, at " + "my_strided_slice"); + } + { + // Begin is above bounds, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, 2, 0}); + AddTestWeights("size", {4}, {1, 1, 2, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "ellipsis_mask is not supported for StridedSlice, at " + "my_strided_slice"); + } + { + // Size is below bounds, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("size", {4}, {1, 1, -2, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "ellipsis_mask is not supported for StridedSlice, at " + "my_strided_slice"); + } + { + // Size is above bounds, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("size", {4}, {1, 1, 3, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "ellipsis_mask is not supported for StridedSlice, at " + "my_strided_slice"); + } +} + TEST_F(OpConverterTest, ConvertConv2D) { { // Input list is empty, should fail. -- GitLab From aa554072ea49d000ecb76b9525b6caaf2c37bc9d Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 5 Feb 2019 10:56:54 -0800 Subject: [PATCH 023/351] Remove accidental change --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index 6e97025612..3987fa0b4d 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -527,9 +527,6 @@ class Converter { // Tensors/weights added during construction of trt_network_. std::unordered_map trt_tensors_; - // Input constants which have been converted to tensors via IConstLayer - std::unordered_map trt_tensors_from_constants_; - // Special op converter for custom plugins. OpConverter plugin_converter_; -- GitLab From ff601bd00379197dfabaf714890d06c0ec6cd9fb Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 5 Feb 2019 10:57:10 -0800 Subject: [PATCH 024/351] Formatting --- .../tf2tensorrt/convert/convert_nodes.cc | 9 ++++++--- .../tf2tensorrt/convert/convert_nodes_test.cc | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 564236f5c0..4374e5edcd 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2379,7 +2379,8 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { if (!AllLengthsEqual({input_dims, begin, size})) { return tensorflow::errors::InvalidArgument( "Length of begin and size arguments must equal rank of input for " - "Slice, at ", node_def.name()); + "Slice, at ", + node_def.name()); } // Size of -1 signifies to take all remaining elements. for (int i = 0; i < input_dims.size(); i++) { @@ -2412,7 +2413,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { if (!AllLengthsEqual({input_dims, begin, end, stride})) { return tensorflow::errors::InvalidArgument( "Length of begin, end, and stride arguments must equal rank of input " - "for StridedSlice, at ", node_def.name()); + "for StridedSlice, at ", + node_def.name()); } // Unsupported mask options. for (const string& attr : @@ -2446,7 +2448,8 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { for (int i = 0; i < input_dims.size(); i++) { if (stride[i] < 0) { return tensorflow::errors::Unimplemented( - "Negative strides are not supported for StridedSlice, at ", node_def.op()); + "Negative strides are not supported for StridedSlice, at ", + node_def.op()); } } // TRT Slice layer uses (begin, size) instead of (begin, end) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index beaf3b1c6c..7fef026480 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2537,9 +2537,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { } // Get nodedef for StridedSlice layer. - auto get_strided_slice_nodedef = - [](int begin_mask = 0, int end_mask = 0, int ellipsis_mask = 0, - int new_axis_mask = 0, int shrink_axis_mask = 0) -> NodeDef { + auto get_strided_slice_nodedef = []( + int begin_mask = 0, int end_mask = 0, int ellipsis_mask = 0, + int new_axis_mask = 0, int shrink_axis_mask = 0) -> NodeDef { Scope s = Scope::NewRootScope(); auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32); @@ -2645,9 +2645,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { AddTestWeights("begin", {4}, {0, 0, 2, 0}); AddTestWeights("end", {4}, {1, 1, 0, 3}); AddTestWeights("strides", {4}, {1, 1, 1, 1}); - RunValidationAndConversion( - node_def, error::INVALID_ARGUMENT, - "\"size\" cannot be negative or zero for StridedSlice, at my_strided_slice"); + RunValidationAndConversion(node_def, error::INVALID_ARGUMENT, + "\"size\" cannot be negative or zero for " + "StridedSlice, at my_strided_slice"); } struct TestParams { @@ -2675,6 +2675,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { #else const int kStridedSliceOKCases = 18; #endif + // Ok. TestParams ok_params[kStridedSliceOKCases] = { // 2D Crop. TestParams{/*input_dims=*/{1, 2, 3}, /*begin=*/{0, 0, 0, 0}, @@ -2824,7 +2825,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { ok_params[i].begin); AddTestWeights("end", {static_cast(ok_params[i].end.size())}, ok_params[i].end); - AddTestWeights("strides", {static_cast(ok_params[i].strides.size())}, + AddTestWeights("strides", + {static_cast(ok_params[i].strides.size())}, ok_params[i].strides); RunValidationAndConversion(node_def); @@ -2844,8 +2846,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { TEST_F(OpConverterTest, ConvertSlice) { // Get nodedef for Slice layer. - auto get_slice_nodedef = - []() -> NodeDef { + auto get_slice_nodedef = []() -> NodeDef { Scope s = Scope::NewRootScope(); auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT); auto begin = ops::Placeholder(s.WithOpName("begin"), DT_INT32); -- GitLab From 4cfa8bb3f71dd45f99dfdc315def61894e236d0b Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 5 Feb 2019 11:10:23 -0800 Subject: [PATCH 025/351] Fix merge errors --- .../tf2tensorrt/convert/convert_graph.cc | 1 + .../tf2tensorrt/convert/convert_nodes.cc | 16 +++------------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index d829895239..d7441e6420 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -128,6 +128,7 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) { "Rsqrt", "Rsqrt", "Sigmoid", + "Slice", "Snapshot", "Softmax", "Sqrt", diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 4374e5edcd..350e37c427 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2367,7 +2367,7 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { const auto& inputs = params->inputs; const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( - params, {{"input", false}, {"begin", true}, {"size", true}})); + *params, {{"input", false}, {"begin", true}, {"size", true}})); std::vector begin, size; inputs.at(1).weights().ToVector(&begin); inputs.at(2).weights().ToVector(&size); @@ -2397,7 +2397,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { const auto& inputs = params->inputs; const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( - params, + *params, {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}})); // Get input dims. nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); @@ -2665,17 +2665,6 @@ Status ConvertQuantize(OpConverterParams* params) { {"input_max", true}, {"num_bits", true}})); } - if (node_def.op() == "FakeQuantWithMinMaxArgs") { - TF_RETURN_IF_ERROR(CheckInputsWeights(params, {{"input", false}})); - } else if (node_def.op() == "FakeQuantWithMinMaxVars" || - node_def.op() == "QuantizeAndDequantizeV2") { - TF_RETURN_IF_ERROR(CheckInputsWeights( - params, {{"input", false}, {"min", true}, {"max", true}})); - } else if (node_def.op() == "QuantizeAndDequantizeV3") { - TF_RETURN_IF_ERROR(CheckInputsWeights( - params, - {{"input", false}, {"min", true}, {"max", true}, {"num_bits", true}})); - } float min_range = 0.0f; float max_range = 0.0f; if (node_def.op() == "FakeQuantWithMinMaxArgs") { @@ -3741,6 +3730,7 @@ static void RegisterValidatableOpConverters( (*registration)["Pad"] = ConvertPad; (*registration)["Relu6"] = ConvertRelu6; (*registration)["Reshape"] = ConvertReshape; + (*registration)["Slice"] = ConvertSlice; (*registration)["Square"] = ConvertSquare; (*registration)["Squeeze"] = ConvertSqueeze; (*registration)["StridedSlice"] = ConvertStridedSlice; -- GitLab From daaa70ef36cd0ab9a22ae50dcbc128c0905c7774 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 5 Feb 2019 14:46:22 -0800 Subject: [PATCH 026/351] Fix more merge errors. Add Ok tests for slice --- .../tf2tensorrt/convert/convert_nodes.cc | 7 +- .../tf2tensorrt/convert/convert_nodes_test.cc | 84 +++++++++++++++---- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 350e37c427..24c2e2afe2 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2166,9 +2166,6 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, const std::vector& size, const std::vector& stride) { const auto& node_def = params->node_def; - TF_RETURN_IF_ERROR(CheckInputsWeights( - *params, - {{"input", false}, {"begin", true}, {"end", true}, {"strides", true}})); // Get input dims. nvinfer1::Dims dims = input.GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); @@ -2185,13 +2182,13 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, if (begin[i] < 0 || begin[i] > input_dims[i]) { return tensorflow::errors::InvalidArgument( "\"begin\" for dimension ", std::to_string(i), " in ", node_def.op(), - " must be in the range [0, dims(i)), at ", node_def.name()); + " is out of range, at ", node_def.name()); } const int end = begin[i] + size[i]; if (end < 0 || end > input_dims[i]) { return tensorflow::errors::InvalidArgument( "\"begin\" + \"size\" for dimension ", std::to_string(i), " in ", - node_def.op(), " nmust be in the range [0, dims(i)], at ", + node_def.op(), " is out of range, at ", node_def.name()); } if (size[i] <= 0) { diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 7fef026480..8d74494edf 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2832,6 +2832,9 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { TRT_TensorOrWeights output; TF_EXPECT_OK(GetTensorOrWeights("my_strided_slice", &output)); + EXPECT_TRUE(output.is_tensor()); + ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims, + output.tensor()->getDimensions()); const DataVec input_data{ {"input", test::AsTensor({1, 2, 3, 4, 5, 6})}}; @@ -2863,21 +2866,19 @@ TEST_F(OpConverterTest, ConvertSlice) { AddTestWeights("begin", {4}, {0, 0, -1, 0}); AddTestWeights("size", {4}, {1, 1, 2, 3}); RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - "ellipsis_mask is not supported for StridedSlice, at " - "my_strided_slice"); + node_def, error::INVALID_ARGUMENT, + "\"begin\" for dimension 2 in Slice is out of range, at my_slice"); } { // Begin is above bounds, should fail. Reset(); NodeDef node_def = get_slice_nodedef(); AddTestTensor("input", {1, 2, 3}); - AddTestWeights("begin", {4}, {0, 0, 2, 0}); + AddTestWeights("begin", {4}, {0, 0, 3, 0}); AddTestWeights("size", {4}, {1, 1, 2, 3}); RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - "ellipsis_mask is not supported for StridedSlice, at " - "my_strided_slice"); + node_def, error::INVALID_ARGUMENT, + "\"begin\" for dimension 2 in Slice is out of range, at my_slice"); } { // Size is below bounds, should fail. @@ -2885,11 +2886,11 @@ TEST_F(OpConverterTest, ConvertSlice) { NodeDef node_def = get_slice_nodedef(); AddTestTensor("input", {1, 2, 3}); AddTestWeights("begin", {4}, {0, 0, 0, 0}); - AddTestWeights("size", {4}, {1, 1, -2, 3}); + AddTestWeights("size", {4}, {1, 1, 2, -2}); RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - "ellipsis_mask is not supported for StridedSlice, at " - "my_strided_slice"); + node_def, error::INVALID_ARGUMENT, + "\"begin\" + \"size\" for dimension 3 in Slice is out of range, at " + "my_slice"); } { // Size is above bounds, should fail. @@ -2899,9 +2900,64 @@ TEST_F(OpConverterTest, ConvertSlice) { AddTestWeights("begin", {4}, {0, 0, 0, 0}); AddTestWeights("size", {4}, {1, 1, 3, 3}); RunValidationAndConversion( - node_def, error::UNIMPLEMENTED, - "ellipsis_mask is not supported for StridedSlice, at " - "my_strided_slice"); + node_def, error::INVALID_ARGUMENT, + "\"begin\" + \"size\" for dimension 2 in Slice is out of range, at " + "my_slice"); + } + + struct TestParams { + TestParams(const std::vector& input_dims, + const std::vector& begin, const std::vector& size, + const std::vector& expected_output_dims, + const std::vector& expected_output) + : input_dims(input_dims), + begin(begin), + size(size), + expected_output_dims(expected_output_dims), + expected_output(expected_output) {} + + std::vector input_dims; + std::vector begin; + std::vector size; + std::vector expected_output_dims; + std::vector expected_output; + }; + + // Ok. + const int kSliceOKCases = 5; + TestParams ok_params[kSliceOKCases] = { + TestParams{{1, 2, 3}, {0, 0, 0, 0}, {-1, -1, -1, -1}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}}, + TestParams{{1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}}, + TestParams{{1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}}, + TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}}, + TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}}, + }; + + for (int i = 0; i < kSliceOKCases; i++) { + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", ok_params[i].input_dims); + AddTestWeights("begin", + {static_cast(ok_params[i].begin.size())}, + ok_params[i].begin); + AddTestWeights("size", {static_cast(ok_params[i].size.size())}, + ok_params[i].size); + RunValidationAndConversion(node_def); + + TRT_TensorOrWeights output; + TF_EXPECT_OK(GetTensorOrWeights("my_slice", &output)); + EXPECT_TRUE(output.is_tensor()); + ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims, + output.tensor()->getDimensions()); + + const DataVec input_data{ + {"input", test::AsTensor({1, 2, 3, 4, 5, 6})}}; + DataVec output_data{ + {"my_slice", + ConstructTensor(ok_params[i].expected_output.size())}}; + BuildAndRun(input_data, &output_data); + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAreArray(ok_params[i].expected_output)); } } -- GitLab From c810f3ec91eb7961f51ae76b6415717ecb76b453 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 11 Feb 2019 15:25:14 -0800 Subject: [PATCH 027/351] Improve batch manipulation catching. Use GetSpan --- .../tf2tensorrt/convert/convert_nodes.cc | 89 ++++++++++++------- .../tf2tensorrt/convert/convert_nodes.h | 5 +- .../tf2tensorrt/convert/convert_nodes_test.cc | 2 +- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 24c2e2afe2..40d05a422e 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2162,21 +2162,15 @@ tensorflow::Status ConvertSqueeze(OpConverterParams* params) { tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, const TRT_TensorOrWeights& input, - const std::vector& begin, - const std::vector& size, + std::vector begin, + std::vector size, const std::vector& stride) { const auto& node_def = params->node_def; // Get input dims. nvinfer1::Dims dims = input.GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); // Temporarily add batch dimension so that indexes line up properly. - input_dims.insert(input_dims.begin(), input.batch_size()); - // Check that batch dimension is not going to be modified. - if (begin[0] != 0 || size[0] != input_dims[0] || stride[0] != 1) { - return tensorflow::errors::Unimplemented( - "TensorRT does not allow modifications to the batch dimension, at ", - node_def.name()); - } + input_dims.insert(input_dims.begin(), -1); // Check bounds. for (int i = 1; i < input_dims.size(); i++) { if (begin[i] < 0 || begin[i] > input_dims[i]) { @@ -2202,9 +2196,12 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) // Use ISliceLayer. nvinfer1::Dims begin_dims, size_dims, stride_dims; - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims, true)); - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims, true)); - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims, true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(begin, &begin_dims, + /*ignore_first_dim=*/true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(size, &size_dims, + /*ignore_first_dim=*/true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(stride, &stride_dims, + /*ignore_first_dim=*/true)); if (params->validation_only) return Status::OK(); nvinfer1::ISliceLayer* layer = params->converter->network()->addSlice( @@ -2243,8 +2240,8 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, size.insert(size.begin() + 1, 1); reshape_dims_added++; } - TF_RETURN_IF_ERROR( - TensorShapeArrayToTrtDims(input_dims, &reshape_dims, true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &reshape_dims, + /*ignore_first_dim=*/true)); } // Find dimensions which need to be sliced. std::vector pad_dims; @@ -2347,7 +2344,8 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, } nvinfer1::Dims new_dims; - TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims, true)); + TF_RETURN_IF_ERROR(TensorShapeArrayToTrtDims(input_dims, &new_dims, + /*ignore_first_dim=*/true)); const nvinfer1::ITensor* output_tensor = nullptr; TF_RETURN_IF_ERROR(params->converter->PrepareTensorForShape( TRT_TensorOrWeights(tensor), new_dims, &output_tensor)); @@ -2365,9 +2363,12 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( *params, {{"input", false}, {"begin", true}, {"size", true}})); - std::vector begin, size; - inputs.at(1).weights().ToVector(&begin); - inputs.at(2).weights().ToVector(&size); + auto begin_span = inputs.at(1).weights().GetSpan(); + auto size_span = inputs.at(2).weights().GetSpan(); + std::vector begin(begin_span.data(), + begin_span.data() + begin_span.size()); + std::vector size(size_span.data(), + size_span.data() + size_span.size()); // Get input dims. nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); @@ -2379,8 +2380,18 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { "Slice, at ", node_def.name()); } + // Check that batch dimension is unmodified. If the batch size is -1 and the + // size is not -1, we do not convert the op since the batch dim could + // potentially be modified. + if ((size[0] != -1 && (input_dims[0] == -1 || + (input_dims[0] != -1 && size[0] != input_dims[0]))) || + begin[0] != 0) { + return tensorflow::errors::Unimplemented( + "TensorRT does not allow modifications to the batch dimension, at ", + node_def.name()); + } // Size of -1 signifies to take all remaining elements. - for (int i = 0; i < input_dims.size(); i++) { + for (int i = 1; i < input_dims.size(); i++) { if (size[i] == -1) { size[i] = input_dims[i] - begin[i]; } @@ -2402,11 +2413,15 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { // Add batch dimension so that indexes line up properly. input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); // Get begin and end bounds per axis. - TFAttrs attrs(node_def); - std::vector begin, end, stride; - inputs.at(1).weights().ToVector(&begin); - inputs.at(2).weights().ToVector(&end); - inputs.at(3).weights().ToVector(&stride); + auto begin_span = inputs.at(1).weights().GetSpan(); + auto end_span = inputs.at(2).weights().GetSpan(); + auto stride_span = inputs.at(3).weights().GetSpan(); + std::vector begin(begin_span.data(), + begin_span.data() + begin_span.size()); + std::vector end(end_span.data(), + end_span.data() + end_span.size()); + std::vector stride(stride_span.data(), + stride_span.data() + stride_span.size()); if (!AllLengthsEqual({input_dims, begin, end, stride})) { return tensorflow::errors::InvalidArgument( "Length of begin, end, and stride arguments must equal rank of input " @@ -2414,6 +2429,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { node_def.name()); } // Unsupported mask options. + TFAttrs attrs(node_def); for (const string& attr : {"ellipsis_mask", "new_axis_mask", "shrink_axis_mask"}) { int attr_val = attrs.get(attr); @@ -2422,30 +2438,43 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { attr, " is not supported for StridedSlice, at ", node_def.name()); } } + const int begin_mask = attrs.get("begin_mask"); + const int end_mask = attrs.get("end_mask"); + // Check that batch dimension is unmodified. If the batch size is -1 and the + // end mask is not set, we do not convert the op since the batch dim could + // potentially be modified. + if ((!(begin_mask & 1) && begin[0] != 0) || stride[0] != 1 || + (!(end_mask & 1) && (input_dims[0] == -1 || + (input_dims[0] != -1 && end[0] != input_dims[0])))) { + return tensorflow::errors::Unimplemented( + "TensorRT does not allow modifications to the batch dimension, at ", + node_def.name()); + } // Standarize begin and end bounds by applying masks, making negative values // positive, and correcting out of bounds ranges (StridedSlice does this // silently). - for (int i = 0; i < input_dims.size(); i++) { + for (int i = 1; i < input_dims.size(); i++) { // Begin - if ((1 << i) & attrs.get("begin_mask")) { + if ((1 << i) & begin_mask) { begin[i] = 0; } else if (begin[i] < 0) { begin[i] += input_dims[i]; } begin[i] = std::max(0, std::min(begin[i], input_dims[i])); // End - if ((1 << i) & attrs.get("end_mask")) { + if ((1 << i) & end_mask) { end[i] = input_dims[i]; } else if (end[i] < 0) { end[i] += input_dims[i]; } end[i] = std::max(0, std::min(end[i], input_dims[i])); } - // Negative strides currently not supported. + // Negative or zero strides currently not supported. for (int i = 0; i < input_dims.size(); i++) { - if (stride[i] < 0) { + if (stride[i] <= 0) { return tensorflow::errors::Unimplemented( - "Negative strides are not supported for StridedSlice, at ", + "Negative or zero stride values are not supported for StridedSlice, " + "at ", node_def.op()); } } diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index 3987fa0b4d..cbba01ba57 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -191,9 +191,8 @@ class TRT_ShapedWeights { string DebugString() const; template - void ToVector(std::vector* output) const { - const T* weights_ptr = static_cast(const_cast(GetValues())); - *output = std::vector(weights_ptr, weights_ptr + count()); + absl::Span GetSpan() const { + return absl::Span(tensor_.flat().data(), count()); } // TODO(aaroey): make these private. diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 8d74494edf..f46c6063e9 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2613,7 +2613,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { // Modify batch dim with dynamic batch size, should fail. Reset(); NodeDef node_def = get_strided_slice_nodedef(); - AddTestTensor("input", {1, 2, 3}, -1); + AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1); AddTestWeights("begin", {4}, {0, 0, 0, 0}); AddTestWeights("end", {4}, {0, 1, 2, 3}); AddTestWeights("strides", {4}, {1, 1, 1, 1}); -- GitLab From 4736c22b2e853d9c1d53ab346e2f997e0523ba2e Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 11 Feb 2019 15:53:56 -0800 Subject: [PATCH 028/351] Fix formatting --- .../tf2tensorrt/convert/convert_nodes.cc | 9 ++-- .../tf2tensorrt/convert/convert_nodes_test.cc | 41 ++++++++++++------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 40d05a422e..9bd7eb833a 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2182,8 +2182,7 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, if (end < 0 || end > input_dims[i]) { return tensorflow::errors::InvalidArgument( "\"begin\" + \"size\" for dimension ", std::to_string(i), " in ", - node_def.op(), " is out of range, at ", - node_def.name()); + node_def.op(), " is out of range, at ", node_def.name()); } if (size[i] <= 0) { return tensorflow::errors::InvalidArgument( @@ -2367,8 +2366,7 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { auto size_span = inputs.at(2).weights().GetSpan(); std::vector begin(begin_span.data(), begin_span.data() + begin_span.size()); - std::vector size(size_span.data(), - size_span.data() + size_span.size()); + std::vector size(size_span.data(), size_span.data() + size_span.size()); // Get input dims. nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); @@ -2418,8 +2416,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { auto stride_span = inputs.at(3).weights().GetSpan(); std::vector begin(begin_span.data(), begin_span.data() + begin_span.size()); - std::vector end(end_span.data(), - end_span.data() + end_span.size()); + std::vector end(end_span.data(), end_span.data() + end_span.size()); std::vector stride(stride_span.data(), stride_span.data() + stride_span.size()); if (!AllLengthsEqual({input_dims, begin, end, stride})) { diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index f46c6063e9..d6588c4472 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2658,7 +2658,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { int begin_mask; int end_mask; std::vector expected_output_dims; - std::vector expected_output; + std::vector expected_output; }; auto get_mask = [](const std::vector& mask) { @@ -2669,11 +2669,13 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { return result; }; - // Ok. + // Same input is used for all tests. + const std::vector ok_input = {1, 2, 3, 4, 5, 6}; + #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) - const int kStridedSliceOKCases = 22; + const int kStridedSliceOKCases = 23; #else - const int kStridedSliceOKCases = 18; + const int kStridedSliceOKCases = 19; #endif // Ok. TestParams ok_params[kStridedSliceOKCases] = { @@ -2790,6 +2792,13 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { /*end_mask=*/get_mask({1, 0, 1}), /*expected_output_dims=*/{5, 1}, /*expected_output=*/{1, 2, 3, 4, 5}}, + // Clamp out of bounds begin and end. + TestParams{/*input_dims=*/{1, 2, 3}, /*begin=*/{0, 0, -9999, -9}, + /*end=*/{0, 1, 1000, 4}, /*strides=*/{1, 1, 1, 1}, + /*begin_mask=*/get_mask({0, 0, 0, 0}), + /*end_mask=*/get_mask({1, 0, 0, 0}), + /*expected_output_dims=*/{1, 2, 3}, + /*expected_output=*/{1, 2, 3, 4, 5, 6}}, #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) // Strides TestParams{/*input_dims=*/{6}, @@ -2836,8 +2845,7 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims, output.tensor()->getDimensions()); - const DataVec input_data{ - {"input", test::AsTensor({1, 2, 3, 4, 5, 6})}}; + const DataVec input_data{{"input", test::AsTensor(ok_input)}}; DataVec output_data{ {"my_strided_slice", ConstructTensor(ok_params[i].expected_output.size())}}; @@ -2926,11 +2934,17 @@ TEST_F(OpConverterTest, ConvertSlice) { // Ok. const int kSliceOKCases = 5; TestParams ok_params[kSliceOKCases] = { - TestParams{{1, 2, 3}, {0, 0, 0, 0}, {-1, -1, -1, -1}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}}, - TestParams{{1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}}, - TestParams{{1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}}, - TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}}, - TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}}, + TestParams{{1, 2, 3}, + {0, 0, 0, 0}, + {-1, -1, -1, -1}, + {1, 2, 3}, + {1, 2, 3, 4, 5, 6}}, + TestParams{ + {1, 2, 3}, {0, 0, 0, 0}, {1, 1, 2, 3}, {1, 2, 3}, {1, 2, 3, 4, 5, 6}}, + TestParams{ + {1, 2, 3}, {0, 0, 0, 0}, {1, -1, 2, 2}, {1, 2, 2}, {1, 2, 4, 5}}, + TestParams{{6}, {0, 1}, {1, 5}, {5}, {2, 3, 4, 5, 6}}, + TestParams{{6}, {0, 1}, {-1, 3}, {3}, {2, 3, 4}}, }; for (int i = 0; i < kSliceOKCases; i++) { @@ -2952,9 +2966,8 @@ TEST_F(OpConverterTest, ConvertSlice) { const DataVec input_data{ {"input", test::AsTensor({1, 2, 3, 4, 5, 6})}}; - DataVec output_data{ - {"my_slice", - ConstructTensor(ok_params[i].expected_output.size())}}; + DataVec output_data{{"my_slice", ConstructTensor( + ok_params[i].expected_output.size())}}; BuildAndRun(input_data, &output_data); EXPECT_THAT(GetSpanForData(output_data[0]), ElementsAreArray(ok_params[i].expected_output)); -- GitLab From 7fb1c7a9e50adb71c476e4d48ac0cc98ba0b50e0 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 11 Feb 2019 16:17:27 -0800 Subject: [PATCH 029/351] Fix TRT < 5.1 fallback. Add negative stride test case for TRT >= 5.1 --- .../tf2tensorrt/convert/convert_nodes.cc | 7 ++++--- .../tf2tensorrt/convert/convert_nodes_test.cc | 20 ++++++++++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 9bd7eb833a..ebe8ba683d 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2250,10 +2250,11 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, } } if (pad_dims.empty()) { - // No dimensions are changed. We could create a padding layer anyway with - // values of 0. + // No dimensions are changed. Create a no-op layer so tests don't break. if (params->validation_only) return Status::OK(); - params->outputs->push_back(inputs.at(0)); + nvinfer1::IShuffleLayer* layer = params->converter->network()->addShuffle( + *const_cast(input.tensor())); + params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0))); return tensorflow::Status::OK(); } else if (pad_dims.size() == 1) { // Only one dim is modified but we have to have 2, mark a second dim which diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index d6588c4472..628f52ffd2 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2623,7 +2623,21 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { "my_strided_slice"); } // TRT 5.1+ supports strides -#if NV_TENSORRT_MAJOR < 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR < 1) +#if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) + { + // Negative strides, should fail. + Reset(); + NodeDef node_def = get_strided_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("end", {4}, {1, 1, 2, 3}); + AddTestWeights("strides", {4}, {1, 1, 1, -1}); + RunValidationAndConversion(node_def, error::UNIMPLEMENTED, + "Negative or zero stride values are not " + "supported for StridedSlice, at " + "my_strided_slice"); + } +#else { // Stride is not 1, should fail. Reset(); @@ -2633,8 +2647,8 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { AddTestWeights("end", {4}, {1, 1, 2, 3}); AddTestWeights("strides", {4}, {1, 2, 1, 3}); RunValidationAndConversion(node_def, error::UNIMPLEMENTED, - "StridedSlice is only implemented for stride of " - "1, at my_strided_slice"); + "Strides other than 1 are not supported with " + "this version of TRT, at my_strided_slice"); } #endif { -- GitLab From 95c72116f3d355ce868afac9917497e621b67284 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Mon, 11 Feb 2019 16:18:34 -0800 Subject: [PATCH 030/351] Fix error message --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index ebe8ba683d..3bc9a77c0b 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2473,7 +2473,7 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { return tensorflow::errors::Unimplemented( "Negative or zero stride values are not supported for StridedSlice, " "at ", - node_def.op()); + node_def.name()); } } // TRT Slice layer uses (begin, size) instead of (begin, end) -- GitLab From 615b7c9da929b32a06b9e416b751263c7f56ec77 Mon Sep 17 00:00:00 2001 From: Mahmoud Abuzaina Date: Tue, 12 Feb 2019 10:35:11 -0800 Subject: [PATCH 031/351] Enable quantization --- tensorflow/core/graph/mkl_layout_pass.cc | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index e934978e76..d5dcd16be7 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -291,10 +291,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.pad = "Pad"; csinfo_.pad_with_conv2d = "__MklDummyPadWithConv2D"; csinfo_.pad_with_fused_conv2d = "__MklDummyPadWithFusedConv2D"; -// Temporarily don't convert quantized operators into MKL versions for now. -// TODO(Intel-tf) Once all the relevant PRs have been merged then remove -// the ifdef. -#ifdef INTEL_MKL_QUANTIZED csinfo_.quantized_avg_pool = "QuantizedAvgPool"; csinfo_.quantized_concatv2 = "QuantizedConcatV2"; csinfo_.quantized_conv2d = "QuantizedConv2D"; @@ -316,14 +312,11 @@ class MklLayoutRewritePass : public GraphOptimizationPass { "QuantizedConv2DWithBiasSumAndReluAndRequantize"; csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize = "QuantizedConv2DWithBiasSignedSumAndReluAndRequantize"; -#endif csinfo_.relu = "Relu"; csinfo_.relu_grad = "ReluGrad"; csinfo_.relu6 = "Relu6"; csinfo_.relu6_grad = "Relu6Grad"; -#ifdef INTEL_MKL_QUANTIZED csinfo_.requantize = "Requantize"; -#endif csinfo_.tanh = "Tanh"; csinfo_.tanh_grad = "TanhGrad"; csinfo_.reshape = "Reshape"; @@ -443,7 +436,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.pad_with_fused_conv2d, csinfo_.mkl_pad_with_fused_conv2d, CopyAttrsPadWithFusedConv2D, AlwaysRewrite}); -#ifdef INTEL_MKL_QUANTIZED rinfo_.push_back({csinfo_.quantized_avg_pool, mkl_op_registry::GetMklOpName(csinfo_.quantized_avg_pool), CopyAttrsQuantizedPooling, AlwaysRewrite}); @@ -499,7 +491,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName( csinfo_.quant_conv2d_with_bias_signed_sum_and_relu_and_requantize), CopyAttrsQuantizedConv2D, AlwaysRewrite}); -#endif rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.relu_grad, @@ -511,11 +502,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.relu6_grad, mkl_op_registry::GetMklOpName(csinfo_.relu6_grad), CopyAttrsDataType, AlwaysRewrite}); -#ifdef INTEL_MKL_QUANTIZED rinfo_.push_back({csinfo_.requantize, mkl_op_registry::GetMklOpName(csinfo_.requantize), CopyAttrsRequantize, AlwaysRewrite}); -#endif /* rinfo_.push_back({csinfo_.tanh, mkl_op_registry::GetMklOpName(csinfo_.tanh), @@ -3187,9 +3176,7 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr* g, // Set the Mkl layer label for this op. if (DataTypeIsQuantized(orig_node->input_type(0)) || DataTypeIsQuantized(orig_node->output_type(0))) { -#ifdef INTEL_MKL_QUANTIZED nb.Attr("_kernel", mkl_op_registry::kMklQuantizedOpLabel); -#endif } else { nb.Attr("_kernel", mkl_op_registry::kMklOpLabel); } @@ -3243,7 +3230,6 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr* g, // Current implementation reflects only QuantizedConv2D and its fused Ops. const MklLayoutRewritePass::RewriteInfo* MklLayoutRewritePass::CheckForQuantizedNodeRewrite(const Node* n) const { -#ifdef INTEL_MKL_QUANTIZED DataType Tinput, Tfilter; if (!(GetNodeAttr(n->def(), "Tinput", &Tinput).ok() && GetNodeAttr(n->def(), "Tfilter", &Tfilter).ok())) { @@ -3257,7 +3243,6 @@ MklLayoutRewritePass::CheckForQuantizedNodeRewrite(const Node* n) const { } } } -#endif return nullptr; } -- GitLab From 32b706a0739bf96223f50feafe346a8c7620c5ed Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Tue, 12 Feb 2019 12:48:09 -0800 Subject: [PATCH 032/351] Add CopyToVector. Clean up batch manip logic and add unit tests --- .../tf2tensorrt/convert/convert_nodes.cc | 52 ++++++++++--------- .../tf2tensorrt/convert/convert_nodes.h | 6 +++ .../tf2tensorrt/convert/convert_nodes_test.cc | 48 ++++++++++++++++- 3 files changed, 79 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 3bc9a77c0b..17a9581bc3 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2363,11 +2363,8 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( *params, {{"input", false}, {"begin", true}, {"size", true}})); - auto begin_span = inputs.at(1).weights().GetSpan(); - auto size_span = inputs.at(2).weights().GetSpan(); - std::vector begin(begin_span.data(), - begin_span.data() + begin_span.size()); - std::vector size(size_span.data(), size_span.data() + size_span.size()); + std::vector begin = inputs.at(1).weights().CopyToVector(); + std::vector size = inputs.at(2).weights().CopyToVector(); // Get input dims. nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); @@ -2379,12 +2376,16 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { "Slice, at ", node_def.name()); } - // Check that batch dimension is unmodified. If the batch size is -1 and the - // size is not -1, we do not convert the op since the batch dim could - // potentially be modified. - if ((size[0] != -1 && (input_dims[0] == -1 || - (input_dims[0] != -1 && size[0] != input_dims[0]))) || - begin[0] != 0) { + // Check that batch dimension is unmodified. + const bool begin_is_modified = begin[0] != 0; + // If size[0]s is not -1, we can only know if the batch dimension is + // unmodified when the batch size is defined. When the batch size is + // undefined, we don't convert to be safe. + const bool batch_size_is_defined = input_dims[0] > 0; + const bool size_is_modified = + size[0] != -1 && (!batch_size_is_defined || + (batch_size_is_defined && size[0] != input_dims[0])); + if (begin_is_modified || size_is_modified) { return tensorflow::errors::Unimplemented( "TensorRT does not allow modifications to the batch dimension, at ", node_def.name()); @@ -2412,14 +2413,9 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { // Add batch dimension so that indexes line up properly. input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); // Get begin and end bounds per axis. - auto begin_span = inputs.at(1).weights().GetSpan(); - auto end_span = inputs.at(2).weights().GetSpan(); - auto stride_span = inputs.at(3).weights().GetSpan(); - std::vector begin(begin_span.data(), - begin_span.data() + begin_span.size()); - std::vector end(end_span.data(), end_span.data() + end_span.size()); - std::vector stride(stride_span.data(), - stride_span.data() + stride_span.size()); + std::vector begin = inputs.at(1).weights().CopyToVector(); + std::vector end = inputs.at(2).weights().CopyToVector(); + std::vector stride = inputs.at(3).weights().CopyToVector(); if (!AllLengthsEqual({input_dims, begin, end, stride})) { return tensorflow::errors::InvalidArgument( "Length of begin, end, and stride arguments must equal rank of input " @@ -2438,12 +2434,18 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { } const int begin_mask = attrs.get("begin_mask"); const int end_mask = attrs.get("end_mask"); - // Check that batch dimension is unmodified. If the batch size is -1 and the - // end mask is not set, we do not convert the op since the batch dim could - // potentially be modified. - if ((!(begin_mask & 1) && begin[0] != 0) || stride[0] != 1 || - (!(end_mask & 1) && (input_dims[0] == -1 || - (input_dims[0] != -1 && end[0] != input_dims[0])))) { + // Check that batch dimension is unmodified. + const bool begin_is_modified = !(begin_mask & 1) && begin[0] != 0; + const bool stride_is_modified = stride[0] != 1; + // If the batch size is -1 and the + // If end mask is not set, we can only know if the batch dimension is + // unmodified when the batch size is defined. When the batch size is + // undefined, we don't convert to be safe. + const bool batch_size_is_defined = input_dims[0] > 0; + const bool end_is_modified = + !(end_mask & 1) && (!batch_size_is_defined || + (batch_size_is_defined && end[0] != input_dims[0])); + if (begin_is_modified || stride_is_modified || end_is_modified) { return tensorflow::errors::Unimplemented( "TensorRT does not allow modifications to the batch dimension, at ", node_def.name()); diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index cbba01ba57..bcbad579f7 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -195,6 +195,12 @@ class TRT_ShapedWeights { return absl::Span(tensor_.flat().data(), count()); } + template + std::vector CopyToVector() const { + auto span = GetSpan(); + return std::vector(span.data(), span.data() + span.size()); + } + // TODO(aaroey): make these private. nvinfer1::Dims shape_; // Note: shape.type[] is not used. tensorflow::DataType type_; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 628f52ffd2..43e537e3a1 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2610,18 +2610,29 @@ TEST_F(OpConverterTest, ConvertStridedSlice) { "my_strided_slice"); } { - // Modify batch dim with dynamic batch size, should fail. + // Dynamic batch size without end_mask, should fail. Reset(); NodeDef node_def = get_strided_slice_nodedef(); AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1); AddTestWeights("begin", {4}, {0, 0, 0, 0}); - AddTestWeights("end", {4}, {0, 1, 2, 3}); + AddTestWeights("end", {4}, {1, 1, 2, 3}); AddTestWeights("strides", {4}, {1, 1, 1, 1}); RunValidationAndConversion( node_def, error::UNIMPLEMENTED, "TensorRT does not allow modifications to the batch dimension, at " "my_strided_slice"); } + { + // Dynamic batch size but using end_mask, ok. + Reset(); + NodeDef node_def = get_strided_slice_nodedef(/*begin_mask=*/0, + /*end_mask=*/1); + AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("end", {4}, {0, 1, 2, 2}); + AddTestWeights("strides", {4}, {1, 1, 1, 1}); + RunValidationAndConversion(node_def); + } // TRT 5.1+ supports strides #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1) { @@ -2926,6 +2937,39 @@ TEST_F(OpConverterTest, ConvertSlice) { "\"begin\" + \"size\" for dimension 2 in Slice is out of range, at " "my_slice"); } + { + // Modify batch dim, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("size", {4}, {0, 1, 2, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "TensorRT does not allow modifications to the batch dimension, at " + "my_slice"); + } + { + // Dynamic batch size with size[0] not -1, should fail. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("size", {4}, {1, 1, 2, 3}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "TensorRT does not allow modifications to the batch dimension, at " + "my_slice"); + } + { + // Dynamic batch size but using size[0] of -1, ok. + Reset(); + NodeDef node_def = get_slice_nodedef(); + AddTestTensor("input", {1, 2, 3}, /*batch_size=*/-1); + AddTestWeights("begin", {4}, {0, 0, 0, 0}); + AddTestWeights("size", {4}, {-1, 1, 2, 2}); + RunValidationAndConversion(node_def); + } struct TestParams { TestParams(const std::vector& input_dims, -- GitLab From 83bc3e976ea9fb118959ffd8d267b35a7edf25fe Mon Sep 17 00:00:00 2001 From: Jeffrey Poznanovic Date: Tue, 12 Feb 2019 07:24:55 -0800 Subject: [PATCH 033/351] Adding no_rocm tags to failing unit tests that were previously excluded via run_py3_core.sh --- tensorflow/python/BUILD | 1 + tensorflow/python/eager/BUILD | 2 ++ tensorflow/python/keras/BUILD | 8 +++++++- tensorflow/python/kernel_tests/BUILD | 8 ++++++++ tensorflow/python/kernel_tests/signal/BUILD | 11 +++++++++-- tensorflow/python/ops/parallel_for/BUILD | 1 + tensorflow/python/training/checkpointable/BUILD | 1 + tensorflow/tools/api/tests/BUILD | 1 + 8 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index a6d6868498..710841c675 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3644,6 +3644,7 @@ cuda_py_test( "//third_party/py/numpy", ], shard_count = 16, + tags = ["no_rocm"], ) cuda_py_test( diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index 47a7e9ae2f..a6a367e268 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -151,6 +151,7 @@ cuda_py_test( "//tensorflow/python:nn_grad", "//tensorflow/python:training", ], + tags = ["no_rocm"], ) cuda_py_test( @@ -530,6 +531,7 @@ tf_xla_py_test( srcs = ["def_function_xla_test.py"], tags = [ "no_pip", + "no_rocm", "nomac", ], deps = [ diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 4f710ff658..fcd01bba12 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -607,7 +607,10 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 4, - tags = ["no_windows_gpu"], + tags = [ + "no_rocm", + "no_windows_gpu", + ], ) tf_py_test( @@ -778,6 +781,7 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, + tags = ["no_rocm"], ) cuda_py_test( @@ -791,6 +795,7 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, + tags = ["no_rocm"], ) tf_py_test( @@ -1181,6 +1186,7 @@ tf_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + tags = ["no_rocm"], ) tf_py_test( diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 75a8fa2a68..5fb7357e10 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -131,6 +131,7 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], grpc_enabled = True, + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -1688,6 +1689,7 @@ cuda_py_test( "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:nn_ops", ], + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -2605,6 +2607,7 @@ cuda_py_test( "//tensorflow/python/eager:context", ], flaky = 1, # create_local_cluster sometimes times out. + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -2764,6 +2767,7 @@ cuda_py_test( ], shard_count = 2, tags = [ + "no_rocm", "optonly", # flaky timeouts unless optimized ], xla_enable_strict_auto_jit = True, @@ -2805,6 +2809,7 @@ cuda_py_test( ], shard_count = 4, tags = [ + "no_rocm", "optonly", # times out ], xla_enable_strict_auto_jit = True, @@ -2868,6 +2873,7 @@ cuda_py_test( "//tensorflow/python:nn_grad", "//tensorflow/python:nn_ops", ], + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -2887,6 +2893,7 @@ cuda_py_test( "//tensorflow/python:nn_ops_gen", ], shard_count = 4, + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -3145,6 +3152,7 @@ cuda_py_test( "//tensorflow/python:nn_ops", ], shard_count = 30, + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD index 554bf38029..29eb6ab674 100644 --- a/tensorflow/python/kernel_tests/signal/BUILD +++ b/tensorflow/python/kernel_tests/signal/BUILD @@ -29,6 +29,7 @@ cuda_py_tests( "//tensorflow/python:spectral_ops_test_util", "//tensorflow/python/ops/signal", ], + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) @@ -45,7 +46,10 @@ cuda_py_tests( "//tensorflow/python/ops/signal", ], shard_count = 4, - tags = ["optonly"], + tags = [ + "no_rocm", + "optonly", + ], xla_enable_strict_auto_jit = True, ) @@ -130,7 +134,10 @@ cuda_py_tests( "//tensorflow/python:spectral_ops_test_util", "//tensorflow/python/ops/signal", ], - tags = ["nomac"], + tags = [ + "no_rocm", + "nomac", + ], xla_enable_strict_auto_jit = True, ) diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD index 05d2e4c7fc..3c694c7640 100644 --- a/tensorflow/python/ops/parallel_for/BUILD +++ b/tensorflow/python/ops/parallel_for/BUILD @@ -115,6 +115,7 @@ cuda_py_test( "//tensorflow/python:random_ops", "//tensorflow/python:util", ], + tags = ["no_rocm"], ) cuda_py_test( diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD index e1f58a9e4b..b7d28fee53 100644 --- a/tensorflow/python/training/checkpointable/BUILD +++ b/tensorflow/python/training/checkpointable/BUILD @@ -195,6 +195,7 @@ tf_xla_py_test( srcs = ["util_xla_test.py"], tags = [ "no_pip", + "no_rocm", "nomac", "notsan", # b/74395663 ], diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 4efa4a9651..ba178af0f6 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -35,6 +35,7 @@ py_test( "//tensorflow/tools/common:public_api", "//tensorflow/tools/common:traverse", ], + tags = ["no_rocm"], ) tf_cc_binary( -- GitLab From 03baa8e8462aaf1211afca506b582eb35ae2fd7e Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Tue, 12 Feb 2019 17:02:26 -0800 Subject: [PATCH 034/351] [Intel MKL] Fix incorrect way to dump optimized graph This PR fixes issue 25674. It simply adds a check to ensure that a graph is valid before dumping it. --- .../core/common_runtime/optimization_registry.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/common_runtime/optimization_registry.cc b/tensorflow/core/common_runtime/optimization_registry.cc index 8120a2005a..e7db3aed27 100644 --- a/tensorflow/core/common_runtime/optimization_registry.cc +++ b/tensorflow/core/common_runtime/optimization_registry.cc @@ -41,11 +41,13 @@ Status OptimizationPassRegistry::RunGrouping( Status s = pass->Run(options); if (!s.ok()) return s; if (VLOG_IS_ON(1)) { - DumpGraphToFile( - strings::StrCat( - "after_phase_", phase.first, "_", pass->name(), "_", - reinterpret_cast((*options.graph).get())), - **options.graph); + if (options.graph) { + DumpGraphToFile( + strings::StrCat( + "after_phase_", phase.first, "_", pass->name(), "_", + reinterpret_cast((*options.graph).get())), + **options.graph); + } if (options.partition_graphs) { for (auto& part : *options.partition_graphs) { DumpGraphToFile( -- GitLab From 0299f62f368c973f078438c0758c39e6ac9a5717 Mon Sep 17 00:00:00 2001 From: ANSHUMAN TRIPATHY Date: Tue, 12 Feb 2019 11:47:06 +0530 Subject: [PATCH 035/351] Lite: Unpack Operator Negative Axis Support --- .../internal/reference/reference_ops.h | 10 +++- tensorflow/lite/kernels/unpack.cc | 12 +++-- tensorflow/lite/kernels/unpack_test.cc | 46 +++++++++++++++++-- 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h index 515db6fd37..6df6fdff82 100644 --- a/tensorflow/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h @@ -1902,11 +1902,17 @@ void Unpack(const UnpackParams& params, const RuntimeShape& input_shape, const int outputs_count = params.num_split; int outer_size = 1; - for (int i = 0; i < params.axis; i++) { + int axis = params.axis; + if (axis < 0) { + axis += dimensions; + } + TFLITE_DCHECK_GE(axis, 0); + TFLITE_DCHECK_LT(axis, dimensions); + for (int i = 0; i < axis; ++i) { outer_size *= input_shape.Dims(i); } int copy_size = 1; - for (int i = params.axis + 1; i < dimensions; i++) { + for (int i = axis + 1; i < dimensions; ++i) { copy_size *= input_shape.Dims(i); } TFLITE_DCHECK_EQ(output_shape.FlatSize(), copy_size * outer_size); diff --git a/tensorflow/lite/kernels/unpack.cc b/tensorflow/lite/kernels/unpack.cc index 1caffe14f9..99ad4bb4e8 100644 --- a/tensorflow/lite/kernels/unpack.cc +++ b/tensorflow/lite/kernels/unpack.cc @@ -52,9 +52,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* input = GetInput(context, node, kInputTensor); TF_LITE_ENSURE(context, NumDimensions(input) <= 4); TF_LITE_ENSURE(context, NumDimensions(input) > 1); - TF_LITE_ENSURE(context, NumDimensions(input) > data->axis); - // TODO(renjieliu): Support negative axis. - TF_LITE_ENSURE(context, data->axis >= 0); + int axis = data->axis; + if (axis < 0) { + axis += NumDimensions(input); + } + TF_LITE_ENSURE(context, 0 <= axis && axis < NumDimensions(input)); if (input->type != kTfLiteInt32 && input->type != kTfLiteFloat32) { context->ReportError(context, "Currently pack only supports int32 and float32."); @@ -67,12 +69,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_shape = TfLiteIntArrayCreate(NumDimensions(input) - 1); int o = 0; for (int index = 0; index < NumDimensions(input); ++index) { - if (index != data->axis) { + if (index != axis) { output_shape->data[o++] = input_shape->data[index]; } } - TF_LITE_ENSURE_EQ(context, data->num, input_shape->data[data->axis]); + TF_LITE_ENSURE_EQ(context, data->num, input_shape->data[axis]); for (int i = 0; i < data->num; ++i) { TfLiteIntArray* copied_output_shape = TfLiteIntArrayCopy(output_shape); TfLiteTensor* output = GetOutput(context, node, i); diff --git a/tensorflow/lite/kernels/unpack_test.cc b/tensorflow/lite/kernels/unpack_test.cc index 9b60cce549..f62b3d8a1e 100644 --- a/tensorflow/lite/kernels/unpack_test.cc +++ b/tensorflow/lite/kernels/unpack_test.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include #include +#include #include "tensorflow/lite/interpreter.h" #include "tensorflow/lite/kernels/register.h" #include "tensorflow/lite/kernels/test_util.h" @@ -28,14 +28,16 @@ template class UnpackOpModel : public SingleOpModel { public: UnpackOpModel(const TensorData& input, int axis) { - CHECK_LE(axis, input.shape.size()); + if (axis < 0) { + axis += input.shape.size(); + } const int num_outputs = input.shape[axis]; input_ = AddInput(input); for (int i = 0; i < num_outputs; ++i) { outputs_.push_back(AddOutput(input.type)); } SetBuiltinOp(BuiltinOperator_UNPACK, BuiltinOptions_UnpackOptions, - CreatePackOptions(builder_, num_outputs, axis).Union()); + CreateUnpackOptions(builder_, num_outputs, axis).Union()); BuildInterpreter({GetShape(input_)}); } @@ -104,6 +106,44 @@ TEST(UnpackOpTest, FloatThreeOutputsAxisOne) { EXPECT_THAT(output_datas[1], ElementsAre(2, 4, 6)); } +TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisOne) { + UnpackOpModel model({TensorType_FLOAT32, {3, 2}}, -1); + model.SetInput({1, 2, 3, 4, 5, 6}); + model.Invoke(); + + // Check outputs shapes. + const std::vector>& output_shapes = model.GetOutputShapes(); + EXPECT_EQ(output_shapes.size(), 2); + EXPECT_THAT(output_shapes[0], ElementsAre(3)); + EXPECT_THAT(output_shapes[1], ElementsAre(3)); + + // Check outputs values. + const std::vector>& output_datas = model.GetOutputDatas(); + EXPECT_EQ(output_datas.size(), 2); + EXPECT_THAT(output_datas[0], ElementsAre(1, 3, 5)); + EXPECT_THAT(output_datas[1], ElementsAre(2, 4, 6)); +} + +TEST(UnpackOpTest, FloatThreeOutputsNegativeAxisTwo) { + UnpackOpModel model({TensorType_FLOAT32, {3, 2}}, -2); + model.SetInput({1, 2, 3, 4, 5, 6}); + model.Invoke(); + + // Check outputs shapes. + const std::vector>& output_shapes = model.GetOutputShapes(); + EXPECT_EQ(output_shapes.size(), 3); + EXPECT_THAT(output_shapes[0], ElementsAre(2)); + EXPECT_THAT(output_shapes[1], ElementsAre(2)); + EXPECT_THAT(output_shapes[2], ElementsAre(2)); + + // Check outputs values. + const std::vector>& output_datas = model.GetOutputDatas(); + EXPECT_EQ(output_datas.size(), 3); + EXPECT_THAT(output_datas[0], ElementsAre(1, 2)); + EXPECT_THAT(output_datas[1], ElementsAre(3, 4)); + EXPECT_THAT(output_datas[2], ElementsAre(5, 6)); +} + TEST(UnpackOpTest, FloatOneOutput) { UnpackOpModel model({TensorType_FLOAT32, {1, 6}}, 0); model.SetInput({1, 2, 3, 4, 5, 6}); -- GitLab From 5a72b588c60122aff774e168ad7841256694a0de Mon Sep 17 00:00:00 2001 From: Albin Joy Date: Wed, 13 Feb 2019 12:15:12 +0530 Subject: [PATCH 036/351] Removed redundant code from verifier.cc --- tensorflow/lite/tools/verifier.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tensorflow/lite/tools/verifier.cc b/tensorflow/lite/tools/verifier.cc index 99666ebc69..680e25a145 100644 --- a/tensorflow/lite/tools/verifier.cc +++ b/tensorflow/lite/tools/verifier.cc @@ -199,14 +199,6 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph, variable_tensors, output_tensors; for (int i = 0; i < subgraph.tensors()->Length(); ++i) { const auto* tensor = subgraph.tensors()->Get(i); - bool is_constant_tensor = false; - if (model.buffers() && tensor->buffer() > 0 && - tensor->buffer() < model.buffers()->size()) { - auto* buffer = model.buffers()->Get(tensor->buffer()); - if (buffer && buffer->data()) { - is_constant_tensor = true; - } - } if (IsConstantTensor(*tensor, model)) { constant_tensors.insert(i); } else if (tensor->is_variable()) { -- GitLab From 54d344e6e58dfea50c1737316f1135fe0551720e Mon Sep 17 00:00:00 2001 From: Jeff Poznanovic Date: Wed, 13 Feb 2019 00:25:04 -0700 Subject: [PATCH 037/351] Add additional no_rocm tag for mfcc_ops_test --- tensorflow/python/kernel_tests/signal/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/signal/BUILD b/tensorflow/python/kernel_tests/signal/BUILD index 29eb6ab674..4caecc85ca 100644 --- a/tensorflow/python/kernel_tests/signal/BUILD +++ b/tensorflow/python/kernel_tests/signal/BUILD @@ -77,6 +77,7 @@ cuda_py_tests( "//tensorflow/python/ops/signal", "//tensorflow/python:spectral_ops_test_util", ], + tags = ["no_rocm"], xla_enable_strict_auto_jit = True, ) -- GitLab From 59e215d19c5f7e65a866c00b902871d2bdf2bd61 Mon Sep 17 00:00:00 2001 From: Amit Srivastava Date: Wed, 13 Feb 2019 14:12:40 +0530 Subject: [PATCH 038/351] Removed the warning in optimized_ops.h file Removed compilation warning caused by the file --- .../lite/kernels/internal/optimized/optimized_ops.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 5dc03e7d53..4dfc0b967b 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -2679,7 +2679,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, // General fivefold pattern, with y4 > 1 so there is a non-broadcast inner // dimension. for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8* input2_data_ptr = nullptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { @@ -2708,7 +2708,7 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params, // for y4 == 1 and the loop over y3 is contained within the // AddScalarBroadcast function. for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8* input2_data_ptr = nullptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { @@ -3065,7 +3065,7 @@ inline void BroadcastMulFivefold(const ArithmeticParams& unswitched_params, int y4 = params.broadcast_shape[4]; if (y4 > 1) { for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8* input2_data_ptr = nullptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { @@ -3082,7 +3082,7 @@ inline void BroadcastMulFivefold(const ArithmeticParams& unswitched_params, } } else { for (int i0 = 0; i0 < y0; ++i0) { - const uint8* input2_data_ptr; + const uint8* input2_data_ptr = nullptr; for (int i1 = 0; i1 < y1; ++i1) { input2_data_ptr = input2_data_reset; for (int i2 = 0; i2 < y2; ++i2) { -- GitLab From 312e34e318ee37a5e08af6afaef6a187ca33612e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20K=C3=A4ufl?= Date: Wed, 13 Feb 2019 11:32:51 +0100 Subject: [PATCH 039/351] Add Python 3.7 to classifiers Python 3.7 compatibility seems fixed in 1.13, see eg. tensorflow/tensorflow#20517 and tensorflow/tensorflow#17022. --- tensorflow/tools/pip_package/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index bbe3050513..f0fa8b50db 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -284,6 +284,7 @@ setup( 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Scientific/Engineering', 'Topic :: Scientific/Engineering :: Mathematics', 'Topic :: Scientific/Engineering :: Artificial Intelligence', -- GitLab From 66fc32d73bb769d14d535c04345a2497983b7905 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 09:47:38 -0800 Subject: [PATCH 040/351] Update shape function for BroadcastTo, and add a unit test for it. PiperOrigin-RevId: 233772267 --- tensorflow/core/ops/array_ops.cc | 40 ++++++++++----------------- tensorflow/core/ops/array_ops_test.cc | 27 ++++++++++++++++++ tensorflow/core/ops/math_ops_test.cc | 4 +++ 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 108f187c98..8b6ee87079 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -466,47 +466,37 @@ REGISTER_OP("BroadcastTo") .Attr("T: type") .Attr("Tidx: {int32, int64} = DT_INT32") .SetShapeFn([](InferenceContext* c) { - ShapeHandle in = c->input(0); + ShapeHandle shape_in = c->input(1); + TF_RETURN_IF_ERROR(c->WithRank(shape_in, 1, &shape_in)); ShapeHandle out; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out)); - if (!c->RankKnown(out)) { // We have no information about the shape of the output. c->set_output(0, out); return Status::OK(); } + ShapeHandle in = c->input(0); if (!c->RankKnown(in)) { // We have no information about the shape of the input, // nothing to do here. c->set_output(0, out); return Status::OK(); } - if (c->Rank(out) < c->Rank(in)) { - return errors::InvalidArgument("Cannot broadcast a tensor with shape ", - c->DebugString(in), " shape ", - c->DebugString(out)); - } - - int32 in_offset = c->Rank(out) - c->Rank(in); - for (int32 i = 0; i < c->Rank(out); ++i) { - DimensionHandle dim = c->Dim(out, i); - if (c->ValueKnown(dim)) { - // The first in_offset dimensions for input will be expanded with 1, - // so no check needed. - if (i >= in_offset) { - DimensionHandle in_dim = c->Dim(in, i - in_offset); - if (c->ValueKnown(in_dim) && c->Value(in_dim) != 0) { - if (c->Value(dim) % c->Value(in_dim) != 0) { - return errors::InvalidArgument( - "Cannot broadcast a tensor with shape ", c->DebugString(in), - " shape ", c->DebugString(out)); - } - } - } + int out_rank = c->Rank(out); + TF_RETURN_IF_ERROR(c->WithRankAtMost(in, out_rank, &in)); + int in_rank = c->Rank(in); + for (int i = 0; i < in_rank; ++i) { + auto in_dim = c->Dim(in, in_rank - i - 1); + if (c->Value(in_dim) > 1) { + // If the input dimension is greater than 1 then the output dimension + // must be equal to it, since we only broadcast "from left to right". + auto out_dim = c->Dim(out, out_rank - i - 1); + TF_RETURN_IF_ERROR(c->Merge(in_dim, out_dim, &out_dim)); + TF_RETURN_IF_ERROR( + c->ReplaceDim(out, out_rank - i - 1, out_dim, &out)); } } - c->set_output(0, out); return Status::OK(); }); diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc index 1c29cd2491..92648ce188 100644 --- a/tensorflow/core/ops/array_ops_test.cc +++ b/tensorflow/core/ops/array_ops_test.cc @@ -509,6 +509,33 @@ TEST(ArrayOpsTest, BroadcastArgs_ShapeFn) { INFER_ERROR("Shape must be rank 1 but is rank 0", op, "?;[]"); } +TEST(ArrayOpsTest, BroadcastTo_ShapeFn) { + ShapeInferenceTestOp op("BroadcastTo"); + op.input_tensors.resize(2); + + INFER_OK(op, "?;[?]", "?"); + INFER_OK(op, "[];[1]", "[?]"); + INFER_OK(op, "[1];[1]", "[?]"); + INFER_OK(op, "[1];[2]", "[?,?]"); + INFER_OK(op, "[2,2];[3]", "[?,d0_0,d0_1]"); + + // Rank checks + INFER_ERROR("Shape must be rank 1 but is rank 2", op, "?;[?,?]"); + INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[2];[]"); + INFER_ERROR("Shape must be at most rank 1 but is rank 2", op, "[2,2];[1]"); + + Tensor shape_t(DT_INT64, TensorShape{3}); + test::FillValues(&shape_t, {2, 10, 3}); + op.input_tensors[1] = &shape_t; + INFER_OK(op, "[1,?,1];[3]", "[2,10,3]"); + INFER_OK(op, "[1,1,1];[3]", "[2,10,3]"); + INFER_OK(op, "[10,1];[3]", "[2,d0_0,3]"); + INFER_ERROR("Dimensions must be equal, but are 3 and 2 for", op, + "[3,1,1];[3]"); + INFER_ERROR("Dimensions must be equal, but are 2 and 10 for", op, + "[2,2,1];[3]"); +} + TEST(ArrayOpsTest, BroadcastGradientArgs_ShapeFn) { ShapeInferenceTestOp op("BroadcastGradientArgs"); // Output is always two unknown vectors. diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index 05379a7d69..1e6dbbfb2f 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -144,6 +144,7 @@ TEST(MathOpsTest, BroadcastBinaryOps_ShapeFn) { INFER_OK(op, "[1];[2]", "[d1_0]"); INFER_OK(op, "[2];[1]", "[d0_0]"); INFER_OK(op, "[2];[]", "[d0_0]"); + INFER_OK(op, "[2];[?]", "[d0_0]"); INFER_OK(op, "[0];[0]", "[d0_0|d1_0]"); INFER_OK(op, "[];[0]", "[d1_0]"); @@ -151,6 +152,9 @@ TEST(MathOpsTest, BroadcastBinaryOps_ShapeFn) { INFER_OK(op, "[0];[1]", "[d0_0]"); INFER_OK(op, "[0];[]", "[d0_0]"); + INFER_OK(op, "[2];[?,?]", "[d1_0,d0_0]"); + INFER_OK(op, "[2,2];[?,?,?]", "[d1_0,d0_0,d0_1]"); + // Multiple dimension cases (same test cases, switching x and y). INFER_OK(op, "[?,1,2,3,4,5];[3,1,?]", "[d0_0,d0_1,d0_2,d0_3|d1_0,d0_4,d0_5]"); -- GitLab From 445d52f1b3c22b04e25a44bd289b784eec282929 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 13 Feb 2019 09:48:30 -0800 Subject: [PATCH 041/351] [XLA:Python] Rename CompiledLocalComputation/CompiledXrtComputation to LocalExecutable/XrtExecutable. Rename LocalComputation* to Computation* (there is nothing Local about it any more). Split the two roles of the Python LocalComputation class into Computation (an uncompiled computation that can be compiled) and Executable (a compiled computation that can be executed). While in principle this is a significant API change, in practice JAX (the only known user) does not refer to any of these classes by name. PiperOrigin-RevId: 233772418 --- .../xla/python/local_computation_builder.cc | 265 ++++++++--------- .../xla/python/local_computation_builder.h | 71 +++-- .../xla/python/local_computation_builder.i | 272 +++++++++--------- tensorflow/compiler/xla/python/xla_client.py | 92 +++--- .../compiler/xla/python/xla_client_test.py | 16 +- 5 files changed, 347 insertions(+), 369 deletions(-) diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 10d03e9f57..77bf51cb45 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -332,11 +332,11 @@ StatusOr XrtAllocationTuple::Release(int i) { int64 XrtAllocationTuple::size() const { return elements_.size(); } -CompiledLocalComputation::CompiledLocalComputation( - std::unique_ptr executable) +LocalExecutable::LocalExecutable( + std::unique_ptr executable) : executable_(std::move(executable)) {} -StatusOr CompiledLocalComputation::Execute( +StatusOr LocalExecutable::Execute( absl::Span argument_handles) { if (num_replicas() != 1) { return InvalidArgument( @@ -376,7 +376,7 @@ StatusOr CompiledLocalComputation::Execute( return new LocalShapedBuffer(std::move(result_buffer_status).ValueOrDie()); } -StatusOr CompiledLocalComputation::ExecutePerReplica( +StatusOr LocalExecutable::ExecutePerReplica( absl::Span> argument_handles) { TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); const int num_devices = client->device_count(); @@ -454,14 +454,13 @@ StatusOr CompiledLocalComputation::ExecutePerReplica( return new LocalShapedBufferTuple(std::move(wrapped_results)); } -CompiledXrtComputation::CompiledXrtComputation( - const ProgramShape& program_shape, int64 handle, - const string& session_target) +XrtExecutable::XrtExecutable(const ProgramShape& program_shape, int64 handle, + const string& session_target) : program_shape_(program_shape), handle_(handle), session_target_(session_target) {} -CompiledXrtComputation::~CompiledXrtComputation() { +XrtExecutable::~XrtExecutable() { tensorflow::Scope root = tensorflow::Scope::NewRootScope(); auto computation_handle = tensorflow::ops::Placeholder(root, tensorflow::DT_INT64); @@ -483,7 +482,7 @@ CompiledXrtComputation::~CompiledXrtComputation() { } } -StatusOr CompiledXrtComputation::Execute( +StatusOr XrtExecutable::Execute( absl::Span argument_handles) { const int num_expected_arguments = program_shape().parameters().size(); @@ -522,16 +521,16 @@ StatusOr CompiledXrtComputation::Execute( return new XrtAllocation(output, program_shape().result(), session_target_); } -const ProgramShape& CompiledXrtComputation::program_shape() const { +const ProgramShape& XrtExecutable::program_shape() const { return program_shape_; } -int64 CompiledXrtComputation::handle() const { return handle_; } +int64 XrtExecutable::handle() const { return handle_; } -LocalComputation::LocalComputation(XlaComputation computation) +Computation::Computation(XlaComputation computation) : computation_(std::move(computation)) {} -StatusOr LocalComputation::Compile( +StatusOr Computation::Compile( const std::vector& argument_shapes, const ExecutableBuildOptions* build_options) { std::vector argument_shape_pointers; @@ -548,10 +547,10 @@ StatusOr LocalComputation::Compile( TF_ASSIGN_OR_RETURN( auto local_executable, client->Compile(computation_, argument_shape_pointers, options)); - return new CompiledLocalComputation(std::move(local_executable)); + return new LocalExecutable(std::move(local_executable)); } -StatusOr LocalComputation::CompileForXrt( +StatusOr Computation::CompileForXrt( const std::vector& argument_shapes, const string& session_target) { tensorflow::Scope root = tensorflow::Scope::NewRootScope(); auto program = tensorflow::ops::Placeholder(root, tensorflow::DT_STRING); @@ -579,14 +578,12 @@ StatusOr LocalComputation::CompileForXrt( TF_ASSIGN_OR_RETURN(ProgramShape program_shape, computation().GetProgramShape()); int64 handle = outputs[0].scalar()(); - return new CompiledXrtComputation(program_shape, handle, session_target); + return new XrtExecutable(program_shape, handle, session_target); } -const XlaComputation& LocalComputation::computation() const { - return computation_; -} +const XlaComputation& Computation::computation() const { return computation_; } -string LocalComputation::GetSerializedProto() const { +string Computation::GetSerializedProto() const { string result; if (!computation_.proto().SerializeToString(&result)) { LOG(ERROR) << "Failed to serialize the HloModuleProto."; @@ -595,11 +592,11 @@ string LocalComputation::GetSerializedProto() const { return result; } -StatusOr LocalComputation::GetProgramShape() const { +StatusOr Computation::GetProgramShape() const { return computation_.GetProgramShape(); } -StatusOr LocalComputation::GetReturnValueShape() const { +StatusOr Computation::GetReturnValueShape() const { TF_ASSIGN_OR_RETURN(ProgramShape shape, computation_.GetProgramShape()); return std::move(*shape.mutable_result()); } @@ -608,93 +605,90 @@ LocalOp::LocalOp(const XlaOp& op) : op_(op) {} const XlaOp& LocalOp::op() const { return op_; } -LocalComputationBuilder::LocalComputationBuilder(const string& computation_name) +ComputationBuilder::ComputationBuilder(const string& computation_name) : builder_(computation_name) {} -void LocalComputationBuilder::SetOpMetadata(const OpMetadata& metadata) { +void ComputationBuilder::SetOpMetadata(const OpMetadata& metadata) { builder_.SetOpMetadata(metadata); } -void LocalComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); } +void ComputationBuilder::ClearOpMetadata() { builder_.ClearOpMetadata(); } -StatusOr LocalComputationBuilder::Build() { +StatusOr ComputationBuilder::Build() { TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.Build()); - return new LocalComputation(std::move(computation)); + return new Computation(std::move(computation)); } -LocalOp LocalComputationBuilder::Parameter(int64 parameter_number, - const Shape& shape, - const string& name) { +LocalOp ComputationBuilder::Parameter(int64 parameter_number, + const Shape& shape, const string& name) { return xla::Parameter(&builder_, parameter_number, shape, name); } -StatusOr LocalComputationBuilder::BuildWithRoot( - const LocalOp& root) { +StatusOr ComputationBuilder::BuildWithRoot(const LocalOp& root) { TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.Build(root.op())); - return new LocalComputation(std::move(computation)); + return new Computation(std::move(computation)); } -StatusOr LocalComputationBuilder::GetShape(const LocalOp& operand) { +StatusOr ComputationBuilder::GetShape(const LocalOp& operand) { return builder_.GetShape(operand.op()); } -StatusOr LocalComputationBuilder::GetReturnValueShape() { +StatusOr ComputationBuilder::GetReturnValueShape() { TF_ASSIGN_OR_RETURN(ProgramShape program_shape, builder_.GetProgramShape()); return program_shape.result(); } -LocalOp LocalComputationBuilder::Infeed(const Shape& shape) { +LocalOp ComputationBuilder::Infeed(const Shape& shape) { return xla::Infeed(&builder_, shape); } -void LocalComputationBuilder::Outfeed(const LocalOp& operand, - const Shape& shape, - const string& outfeed_config) { +void ComputationBuilder::Outfeed(const LocalOp& operand, const Shape& shape, + const string& outfeed_config) { xla::Outfeed(operand.op(), shape, outfeed_config); } -LocalOp LocalComputationBuilder::ConstantLiteral(const Literal& literal) { +LocalOp ComputationBuilder::ConstantLiteral(const Literal& literal) { return xla::ConstantLiteral(&builder_, literal); } -LocalOp LocalComputationBuilder::Iota(PrimitiveType element_type, int64 size) { +LocalOp ComputationBuilder::Iota(PrimitiveType element_type, int64 size) { return xla::Iota(&builder_, element_type, size); } -LocalOp LocalComputationBuilder::BroadcastedIota(const Shape& shape, - int64 dimension) { +LocalOp ComputationBuilder::BroadcastedIota(const Shape& shape, + int64 dimension) { return xla::Iota(&builder_, shape, dimension); } -LocalOp LocalComputationBuilder::Broadcast( - const LocalOp& operand, absl::Span broadcast_sizes) { +LocalOp ComputationBuilder::Broadcast(const LocalOp& operand, + absl::Span broadcast_sizes) { return xla::Broadcast(operand.op(), broadcast_sizes); } -LocalOp LocalComputationBuilder::BroadcastInDim( +LocalOp ComputationBuilder::BroadcastInDim( const LocalOp& operand, absl::Span out_dim_sizes, absl::Span broadcast_dimensions) { return xla::BroadcastInDim(operand.op(), out_dim_sizes, broadcast_dimensions); } -LocalOp LocalComputationBuilder::Pad(const LocalOp& operand, - const LocalOp& padding_value, - const PaddingConfig& padding_config) { +LocalOp ComputationBuilder::Pad(const LocalOp& operand, + const LocalOp& padding_value, + const PaddingConfig& padding_config) { return xla::Pad(operand.op(), padding_value.op(), padding_config); } -LocalOp LocalComputationBuilder::Reshape(const LocalOp& operand, - absl::Span dimensions, - absl::Span new_sizes) { +LocalOp ComputationBuilder::Reshape(const LocalOp& operand, + absl::Span dimensions, + absl::Span new_sizes) { return xla::Reshape(operand.op(), dimensions, new_sizes); } -LocalOp LocalComputationBuilder::Collapse(const LocalOp& operand, - absl::Span dimensions) { +LocalOp ComputationBuilder::Collapse(const LocalOp& operand, + absl::Span dimensions) { return xla::Collapse(operand.op(), dimensions); } -LocalOp LocalComputationBuilder::AllToAll( +LocalOp ComputationBuilder::AllToAll( const LocalOp& operand, int64 split_dimension, int64 concat_dimension, int64 split_count, absl::Span replica_groups) { std::vector rg(replica_groups.size()); @@ -705,39 +699,38 @@ LocalOp LocalComputationBuilder::AllToAll( split_count, rg); } -LocalOp LocalComputationBuilder::CrossReplicaSum( +LocalOp ComputationBuilder::CrossReplicaSum( const LocalOp& operand, absl::Span replica_groups) { return xla::CrossReplicaSum(operand.op(), replica_groups); } -LocalOp LocalComputationBuilder::Slice(const LocalOp& operand, - absl::Span start_indices, - absl::Span limit_indices, - absl::Span strides) { +LocalOp ComputationBuilder::Slice(const LocalOp& operand, + absl::Span start_indices, + absl::Span limit_indices, + absl::Span strides) { return xla::Slice(operand.op(), start_indices, limit_indices, strides); } -LocalOp LocalComputationBuilder::SliceInDim(const LocalOp& operand, - int64 start_index, - int64 limit_index, int64 stride, - int64 dimno) { +LocalOp ComputationBuilder::SliceInDim(const LocalOp& operand, + int64 start_index, int64 limit_index, + int64 stride, int64 dimno) { return xla::SliceInDim(operand.op(), start_index, limit_index, stride, dimno); } -LocalOp LocalComputationBuilder::DynamicSlice( - const LocalOp& operand, const LocalOp& start_indices, - absl::Span slice_sizes) { +LocalOp ComputationBuilder::DynamicSlice(const LocalOp& operand, + const LocalOp& start_indices, + absl::Span slice_sizes) { return xla::DynamicSlice(operand.op(), start_indices.op(), slice_sizes); } -LocalOp LocalComputationBuilder::DynamicUpdateSlice( - const LocalOp& operand, const LocalOp& update, - const LocalOp& start_indices) { +LocalOp ComputationBuilder::DynamicUpdateSlice(const LocalOp& operand, + const LocalOp& update, + const LocalOp& start_indices) { return xla::DynamicUpdateSlice(operand.op(), update.op(), start_indices.op()); } -LocalOp LocalComputationBuilder::ConcatInDim(absl::Span operands, - int64 dimension) { +LocalOp ComputationBuilder::ConcatInDim(absl::Span operands, + int64 dimension) { std::vector xla_ops; xla_ops.reserve(operands.size()); for (const auto& op : operands) { @@ -746,18 +739,18 @@ LocalOp LocalComputationBuilder::ConcatInDim(absl::Span operands, return xla::ConcatInDim(&builder_, xla_ops, dimension); } -LocalOp LocalComputationBuilder::SelectAndScatterWithGeneralPadding( - const LocalOp& operand, const LocalComputation& select, +LocalOp ComputationBuilder::SelectAndScatterWithGeneralPadding( + const LocalOp& operand, const Computation& select, absl::Span window_dimensions, absl::Span window_strides, absl::Span> padding, const LocalOp& source, - const LocalOp& init_value, const LocalComputation& scatter) { + const LocalOp& init_value, const Computation& scatter) { return xla::SelectAndScatterWithGeneralPadding( operand.op(), select.computation(), window_dimensions, window_strides, padding, source.op(), init_value.op(), scatter.computation()); } -LocalOp LocalComputationBuilder::Tuple(absl::Span elements) { +LocalOp ComputationBuilder::Tuple(absl::Span elements) { std::vector xla_ops; xla_ops.reserve(elements.size()); for (const auto& op : elements) { @@ -767,22 +760,22 @@ LocalOp LocalComputationBuilder::Tuple(absl::Span elements) { return xla::Tuple(&builder_, xla_ops); } -LocalOp LocalComputationBuilder::GetTupleElement(const LocalOp& tuple_data, - int64 index) { +LocalOp ComputationBuilder::GetTupleElement(const LocalOp& tuple_data, + int64 index) { return xla::GetTupleElement(tuple_data.op(), index); } -LocalOp LocalComputationBuilder::Dot(const LocalOp& lhs, const LocalOp& rhs) { +LocalOp ComputationBuilder::Dot(const LocalOp& lhs, const LocalOp& rhs) { return xla::Dot(lhs.op(), rhs.op()); } -LocalOp LocalComputationBuilder::DotGeneral( +LocalOp ComputationBuilder::DotGeneral( const LocalOp& lhs, const LocalOp& rhs, const DotDimensionNumbers& dimension_numbers) { return xla::DotGeneral(lhs.op(), rhs.op(), dimension_numbers); } -LocalOp LocalComputationBuilder::ConvGeneralDilated( +LocalOp ComputationBuilder::ConvGeneralDilated( const LocalOp& lhs, const LocalOp& rhs, absl::Span window_strides, absl::Span> padding, @@ -794,18 +787,18 @@ LocalOp LocalComputationBuilder::ConvGeneralDilated( feature_group_count); } -LocalOp LocalComputationBuilder::ConvertElementType( - const LocalOp& operand, PrimitiveType new_element_type) { +LocalOp ComputationBuilder::ConvertElementType(const LocalOp& operand, + PrimitiveType new_element_type) { return xla::ConvertElementType(operand.op(), new_element_type); } -LocalOp LocalComputationBuilder::BitcastConvertType( - const LocalOp& operand, PrimitiveType new_element_type) { +LocalOp ComputationBuilder::BitcastConvertType(const LocalOp& operand, + PrimitiveType new_element_type) { return xla::BitcastConvertType(operand.op(), new_element_type); } -LocalOp LocalComputationBuilder::Call(const LocalComputation& local_computation, - absl::Span operands) { +LocalOp ComputationBuilder::Call(const Computation& local_computation, + absl::Span operands) { std::vector xla_ops; xla_ops.reserve(operands.size()); for (const auto& op : operands) { @@ -814,7 +807,7 @@ LocalOp LocalComputationBuilder::Call(const LocalComputation& local_computation, return xla::Call(&builder_, local_computation.computation(), xla_ops); } -LocalOp LocalComputationBuilder::CustomCall( +LocalOp ComputationBuilder::CustomCall( const string& call_target_name, absl::Span operands, const Shape& shape_with_layout, const std::vector& operand_shapes_with_layout, @@ -829,19 +822,19 @@ LocalOp LocalComputationBuilder::CustomCall( operand_shapes_with_layout, opaque); } -LocalOp LocalComputationBuilder::Transpose( - const LocalOp& operand, absl::Span permutation) { +LocalOp ComputationBuilder::Transpose(const LocalOp& operand, + absl::Span permutation) { return xla::Transpose(operand.op(), permutation); } -LocalOp LocalComputationBuilder::Rev(const LocalOp& operand, - absl::Span dimensions) { +LocalOp ComputationBuilder::Rev(const LocalOp& operand, + absl::Span dimensions) { return xla::Rev(operand.op(), dimensions); } -LocalOp LocalComputationBuilder::Map(absl::Span operands, - const LocalComputation& local_computation, - absl::Span dimensions) { +LocalOp ComputationBuilder::Map(absl::Span operands, + const Computation& local_computation, + absl::Span dimensions) { std::vector xla_ops; xla_ops.reserve(operands.size()); for (const auto& op : operands) { @@ -852,17 +845,17 @@ LocalOp LocalComputationBuilder::Map(absl::Span operands, dimensions); } -LocalOp LocalComputationBuilder::Reduce( +LocalOp ComputationBuilder::Reduce( const LocalOp& operand, const LocalOp& init_value, - const LocalComputation& local_computation, + const Computation& local_computation, absl::Span dimensions_to_reduce) { return xla::Reduce(operand.op(), init_value.op(), local_computation.computation(), dimensions_to_reduce); } -LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( +LocalOp ComputationBuilder::ReduceWindowWithGeneralPadding( const LocalOp& operand, const LocalOp& init_value, - const LocalComputation& local_computation, + const Computation& local_computation, absl::Span window_dimensions, absl::Span window_strides, absl::Span base_dilations, @@ -874,51 +867,50 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( padding); } -LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu, - const LocalOp& sigma, - const Shape& shape) { +LocalOp ComputationBuilder::RngNormal(const LocalOp& mu, const LocalOp& sigma, + const Shape& shape) { return xla::RngNormal(mu.op(), sigma.op(), shape); } -LocalOp LocalComputationBuilder::RngUniform(const LocalOp& a, const LocalOp& b, - const Shape& shape) { +LocalOp ComputationBuilder::RngUniform(const LocalOp& a, const LocalOp& b, + const Shape& shape) { return xla::RngUniform(a.op(), b.op(), shape); } -LocalOp LocalComputationBuilder::While(const LocalComputation& condition, - const LocalComputation& body, - const LocalOp& init) { +LocalOp ComputationBuilder::While(const Computation& condition, + const Computation& body, + const LocalOp& init) { return xla::While(condition.computation(), body.computation(), init.op()); } -LocalOp LocalComputationBuilder::Conditional( - const LocalOp& predicate, const LocalOp& true_operand, - const LocalComputation& true_computation, const LocalOp& false_operand, - const LocalComputation& false_computation) { +LocalOp ComputationBuilder::Conditional(const LocalOp& predicate, + const LocalOp& true_operand, + const Computation& true_computation, + const LocalOp& false_operand, + const Computation& false_computation) { return xla::Conditional(predicate.op(), true_operand.op(), true_computation.computation(), false_operand.op(), false_computation.computation()); } -StatusOr LocalComputationBuilder::IsConstant(const LocalOp& operand) { +StatusOr ComputationBuilder::IsConstant(const LocalOp& operand) { return builder_.IsConstant(operand.op()); } -LocalOp LocalComputationBuilder::Sort(const LocalOp& operand, int64 dimension) { +LocalOp ComputationBuilder::Sort(const LocalOp& operand, int64 dimension) { return xla::Sort(operand.op(), {}, dimension); } -LocalOp LocalComputationBuilder::SortKeyVal(const LocalOp& keys, - const LocalOp& values, - int64 dimension) { +LocalOp ComputationBuilder::SortKeyVal(const LocalOp& keys, + const LocalOp& values, int64 dimension) { return xla::Sort(keys.op(), {values.op()}, dimension); } -LocalOp LocalComputationBuilder::Cholesky(const LocalOp& a) { +LocalOp ComputationBuilder::Cholesky(const LocalOp& a) { return xla::Cholesky(a.op()); } -LocalOp LocalComputationBuilder::QR(const LocalOp& a, bool full_matrices) { +LocalOp ComputationBuilder::QR(const LocalOp& a, bool full_matrices) { XlaBuilder* builder = a.op().builder(); return builder->ReportErrorOrReturn([&]() -> StatusOr { TF_ASSIGN_OR_RETURN(auto qr, xla::QRDecomposition(a.op(), full_matrices)); @@ -926,17 +918,16 @@ LocalOp LocalComputationBuilder::QR(const LocalOp& a, bool full_matrices) { }); } -LocalOp LocalComputationBuilder::TriangularSolve(const LocalOp& a, - const LocalOp& b, - bool left_side, bool lower, - bool unit_diagonal, - int transpose_a) { +LocalOp ComputationBuilder::TriangularSolve(const LocalOp& a, const LocalOp& b, + bool left_side, bool lower, + bool unit_diagonal, + int transpose_a) { return xla::TriangularSolve( a.op(), b.op(), left_side, lower, unit_diagonal, xla::TriangularSolveOptions::Transpose(transpose_a)); } -LocalOp LocalComputationBuilder::Gather( +LocalOp ComputationBuilder::Gather( const LocalOp& input, const LocalOp& start_indices, const GatherDimensionNumbers& dimension_numbers, absl::Span slice_sizes) { @@ -944,24 +935,24 @@ LocalOp LocalComputationBuilder::Gather( slice_sizes); } -LocalOp LocalComputationBuilder::Scatter( +LocalOp ComputationBuilder::Scatter( const LocalOp& input, const LocalOp& scatter_indices, - const LocalOp& updates, const LocalComputation& update_computation, + const LocalOp& updates, const Computation& update_computation, const ScatterDimensionNumbers& dimension_numbers) { return xla::Scatter(input.op(), scatter_indices.op(), updates.op(), update_computation.computation(), dimension_numbers); } -StatusOr LocalComputationBuilder::BuildConstantSubGraph( +StatusOr ComputationBuilder::BuildConstantSubGraph( const LocalOp& operand) { TF_ASSIGN_OR_RETURN(XlaComputation computation, builder_.BuildConstantSubGraph(operand.op())); - return new LocalComputation(std::move(computation)); + return new Computation(std::move(computation)); } -#define _FORWARD(method_name, return_sig, args_sig, args) \ - return_sig LocalComputationBuilder::method_name args_sig { \ - return xla::method_name args; \ +#define _FORWARD(method_name, return_sig, args_sig, args) \ + return_sig ComputationBuilder::method_name args_sig { \ + return xla::method_name args; \ } #define _FORWARD_UNOP(method_name) \ @@ -1050,17 +1041,11 @@ void DeleteLocalShapedBuffer(LocalShapedBuffer* local_shaped_buffer) { void DeleteXrtAllocation(XrtAllocation* allocation) { delete allocation; } -void DeleteCompiledLocalComputation(CompiledLocalComputation* computation) { - delete computation; -} +void DeleteLocalExecutable(LocalExecutable* computation) { delete computation; } -void DeleteCompiledXrtComputation(CompiledXrtComputation* computation) { - delete computation; -} +void DeleteXrtExecutable(XrtExecutable* computation) { delete computation; } -void DeleteLocalComputation(LocalComputation* computation) { - delete computation; -} +void DeleteComputation(Computation* computation) { delete computation; } StatusOr DestructureLocalShapedBufferTuple( LocalShapedBuffer* local_shaped_buffer) { diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index f62b2b6c72..c9e93fb5aa 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -176,9 +176,9 @@ StatusOr DestructureXrtAllocationTuple( // Represents a compiled computation that can be executed given handles to // device-allocated literals. Specifically, wraps an XLA LocalExecutable. -class CompiledLocalComputation { +class LocalExecutable { public: - CompiledLocalComputation(std::unique_ptr executable); + LocalExecutable(std::unique_ptr executable); int num_replicas() const { return executable_->build_options().num_replicas(); @@ -194,18 +194,18 @@ class CompiledLocalComputation { absl::Span > argument_handles); private: - std::unique_ptr executable_; + std::unique_ptr executable_; }; // Represents a compiled computation that can be executed given handles to // device-allocated literals. Specifically, wraps an XRT computation handle. -class CompiledXrtComputation { +class XrtExecutable { public: // Accepts a `session_target` argument, used in constructing the // `tensorflow::ClientSession` instance in which the execution graph is run. - CompiledXrtComputation(const ProgramShape& program_shape, int64 handle, - const string& session_target); - ~CompiledXrtComputation(); + XrtExecutable(const ProgramShape& program_shape, int64 handle, + const string& session_target); + ~XrtExecutable(); StatusOr Execute( absl::Span argument_handles); @@ -219,21 +219,21 @@ class CompiledXrtComputation { const string session_target_; }; -// Wraps a XlaComputation produced by a LocalComputationBuilder. The +// Wraps a XlaComputation produced by a ComputationBuilder. The // Compile method compiles the computation to a (local) executable via // the client library's local client. This class is intended to be // made available to Python via SWIG. -class LocalComputation { +class Computation { public: - LocalComputation(XlaComputation computation); + Computation(XlaComputation computation); - StatusOr Compile( + StatusOr Compile( const std::vector& argument_shapes, const ExecutableBuildOptions* build_options); // Accepts a `session_target` argument, used in constructing the // `tensorflow::ClientSession` instance in which the compilation graph is run. - StatusOr CompileForXrt( + StatusOr CompileForXrt( const std::vector& argument_shapes, const string& session_target); const XlaComputation& computation() const; @@ -253,7 +253,7 @@ class LocalComputation { XlaComputation computation_; }; -// Wraps a XlaOp produced by a LocalComputationBuilder. This class is intended +// Wraps a XlaOp produced by a ComputationBuilder. This class is intended // to be made available to Python via SWIG. class LocalOp { public: @@ -270,20 +270,20 @@ class LocalOp { // Python. // - Set up the underlying builder to use the client library's // LocalClient. -// - Wrap Computations in LocalComputations for Python access. -// - Correspondingly unwrap incoming LocalComputations. -class LocalComputationBuilder { +// - Wrap Computations in Computations for Python access. +// - Correspondingly unwrap incoming Computations. +class ComputationBuilder { public: - LocalComputationBuilder(const string& computation_name); + ComputationBuilder(const string& computation_name); void SetOpMetadata(const OpMetadata& metadata); void ClearOpMetadata(); - // Returns an owned LocalComputation to the caller on success. - StatusOr Build(); + // Returns an owned Computation to the caller on success. + StatusOr Build(); - // Returns an owned LocalComputation to the caller on success with given root. - StatusOr BuildWithRoot(const LocalOp& root); + // Returns an owned Computation to the caller on success with given root. + StatusOr BuildWithRoot(const LocalOp& root); LocalOp Parameter(int64 parameter_number, const Shape& shape, const string& name); @@ -342,11 +342,11 @@ class LocalComputationBuilder { LocalOp ConcatInDim(absl::Span operands, int64 dimension); LocalOp SelectAndScatterWithGeneralPadding( - const LocalOp& operand, const LocalComputation& select, + const LocalOp& operand, const Computation& select, absl::Span window_dimensions, absl::Span window_strides, absl::Span > padding, const LocalOp& source, - const LocalOp& init_value, const LocalComputation& scatter); + const LocalOp& init_value, const Computation& scatter); LocalOp Tuple(absl::Span elements); @@ -372,7 +372,7 @@ class LocalComputationBuilder { LocalOp BitcastConvertType(const LocalOp& operand, PrimitiveType new_element_type); - LocalOp Call(const LocalComputation& local_computation, + LocalOp Call(const Computation& local_computation, absl::Span operands); LocalOp CustomCall(const string& call_target_name, @@ -387,16 +387,16 @@ class LocalComputationBuilder { LocalOp Rev(const LocalOp& operand, absl::Span dimensions); LocalOp Map(absl::Span operands, - const LocalComputation& local_computation, + const Computation& local_computation, absl::Span dimensions); LocalOp Reduce(const LocalOp& operand, const LocalOp& init_value, - const LocalComputation& local_computation, + const Computation& local_computation, absl::Span dimensions_to_reduce); LocalOp ReduceWindowWithGeneralPadding( const LocalOp& operand, const LocalOp& init_value, - const LocalComputation& local_computation, + const Computation& local_computation, absl::Span window_dimensions, absl::Span window_strides, absl::Span base_dilations, @@ -408,13 +408,13 @@ class LocalComputationBuilder { LocalOp RngUniform(const LocalOp& a, const LocalOp& b, const Shape& shape); - LocalOp While(const LocalComputation& condition, const LocalComputation& body, + LocalOp While(const Computation& condition, const Computation& body, const LocalOp& init); LocalOp Conditional(const LocalOp& predicate, const LocalOp& true_operand, - const LocalComputation& true_computation, + const Computation& true_computation, const LocalOp& false_operand, - const LocalComputation& false_computation); + const Computation& false_computation); StatusOr IsConstant(const LocalOp& operand); @@ -438,11 +438,10 @@ class LocalComputationBuilder { absl::Span slice_sizes); LocalOp Scatter(const LocalOp& input, const LocalOp& scatter_indices, - const LocalOp& updates, - const LocalComputation& update_computation, + const LocalOp& updates, const Computation& update_computation, const ScatterDimensionNumbers& dimension_numbers); - StatusOr BuildConstantSubGraph(const LocalOp& operand); + StatusOr BuildConstantSubGraph(const LocalOp& operand); #define _FORWARD(method_name, return_sig, args_sig) \ return_sig method_name args_sig; @@ -532,9 +531,9 @@ class LocalComputationBuilder { // Functions for freeing resources from the Python side. void DeleteLocalShapedBuffer(LocalShapedBuffer* local_shaped_buffer); void DeleteXrtAllocation(XrtAllocation* allocation); -void DeleteCompiledLocalComputation(CompiledLocalComputation* computation); -void DeleteCompiledXrtComputation(CompiledXrtComputation* computation); -void DeleteLocalComputation(LocalComputation* computation); +void DeleteLocalExecutable(LocalExecutable* computation); +void DeleteXrtExecutable(XrtExecutable* computation); +void DeleteComputation(Computation* computation); } // namespace swig } // namespace xla diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index 688fcf9f4d..e47227da61 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -98,7 +98,7 @@ limitations under the License. // wrapped in a Python class (xla_client.Shape) so as not to expose // the raw pair externally. // -// Other SWIG object wrappers (e.g. of LocalComputation) are further +// Other SWIG object wrappers (e.g. of Computation) are further // wrapped by xla_client in order to set up a custom destructor that // triggers memory deallocation on the C++ side. @@ -288,12 +288,12 @@ tensorflow::ImportNumpy(); // Computation and buffer/allocation types -%typemap(out) StatusOr { +%typemap(out) StatusOr { if ($1.ok()) { auto* value = $1.ValueOrDie(); { auto* $1 = value; - $typemap(out, xla::swig::CompiledLocalComputation*) + $typemap(out, xla::swig::LocalExecutable*) } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); @@ -301,12 +301,12 @@ tensorflow::ImportNumpy(); } } -%typemap(out) StatusOr { +%typemap(out) StatusOr { if ($1.ok()) { auto* value = $1.ValueOrDie(); { auto* $1 = value; - $typemap(out, xla::swig::CompiledXrtComputation*) + $typemap(out, xla::swig::XrtExecutable*) } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); @@ -366,12 +366,12 @@ tensorflow::ImportNumpy(); } } -%typemap(out) StatusOr { +%typemap(out) StatusOr { if ($1.ok()) { auto* value = $1.ValueOrDie(); { auto* $1 = value; - $typemap(out, xla::swig::LocalComputation*) + $typemap(out, xla::swig::Computation*) } } else { PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); @@ -1000,140 +1000,140 @@ tensorflow::ImportNumpy(); %unignore xla::swig::XrtAllocationTuple; %unignore xla::swig::XrtAllocationTuple::Release; %unignore xla::swig::XrtAllocationTuple::size; -%unignore xla::swig::CompiledLocalComputation; -%unignore xla::swig::CompiledLocalComputation::Execute; -%unignore xla::swig::CompiledLocalComputation::ExecutePerReplica; -%unignore xla::swig::CompiledXrtComputation; -%unignore xla::swig::CompiledXrtComputation::Execute; -%unignore xla::swig::LocalComputation; -%unignore xla::swig::LocalComputation::Compile; -%unignore xla::swig::LocalComputation::CompileForXrt; -%unignore xla::swig::LocalComputation::GetProgramShape; -%unignore xla::swig::LocalComputation::GetReturnValueShape; -%unignore xla::swig::LocalComputation::GetSerializedProto; +%unignore xla::swig::LocalExecutable; +%unignore xla::swig::LocalExecutable::Execute; +%unignore xla::swig::LocalExecutable::ExecutePerReplica; +%unignore xla::swig::XrtExecutable; +%unignore xla::swig::XrtExecutable::Execute; +%unignore xla::swig::Computation; +%unignore xla::swig::Computation::Compile; +%unignore xla::swig::Computation::CompileForXrt; +%unignore xla::swig::Computation::GetProgramShape; +%unignore xla::swig::Computation::GetReturnValueShape; +%unignore xla::swig::Computation::GetSerializedProto; %unignore xla::swig::LocalOp; -%unignore xla::swig::LocalComputationBuilder; -%unignore xla::swig::LocalComputationBuilder::LocalComputationBuilder; -%unignore xla::swig::LocalComputationBuilder::Build; -%unignore xla::swig::LocalComputationBuilder::BuildWithRoot; -%unignore xla::swig::LocalComputationBuilder::SetOpMetadata; -%unignore xla::swig::LocalComputationBuilder::ClearOpMetadata; -%unignore xla::swig::LocalComputationBuilder::Parameter; -%unignore xla::swig::LocalComputationBuilder::GetShape; -%unignore xla::swig::LocalComputationBuilder::GetReturnValueShape; -%unignore xla::swig::LocalComputationBuilder::Infeed; -%unignore xla::swig::LocalComputationBuilder::Outfeed; -%unignore xla::swig::LocalComputationBuilder::ConstantLiteral; -%unignore xla::swig::LocalComputationBuilder::ConstantR0; -%unignore xla::swig::LocalComputationBuilder::Iota; -%unignore xla::swig::LocalComputationBuilder::BroadcastedIota; -%unignore xla::swig::LocalComputationBuilder::Broadcast; -%unignore xla::swig::LocalComputationBuilder::BroadcastInDim; -%unignore xla::swig::LocalComputationBuilder::Pad; -%unignore xla::swig::LocalComputationBuilder::Reshape; -%unignore xla::swig::LocalComputationBuilder::Collapse; -%unignore xla::swig::LocalComputationBuilder::AllToAll; -%unignore xla::swig::LocalComputationBuilder::CrossReplicaSum; -%unignore xla::swig::LocalComputationBuilder::Slice; -%unignore xla::swig::LocalComputationBuilder::SliceInDim; -%unignore xla::swig::LocalComputationBuilder::DynamicSlice; -%unignore xla::swig::LocalComputationBuilder::DynamicUpdateSlice; -%unignore xla::swig::LocalComputationBuilder::ConcatInDim; -%unignore xla::swig::LocalComputationBuilder::SelectAndScatterWithGeneralPadding; -%unignore xla::swig::LocalComputationBuilder::Select; -%unignore xla::swig::LocalComputationBuilder::Tuple; -%unignore xla::swig::LocalComputationBuilder::GetTupleElement; -%unignore xla::swig::LocalComputationBuilder::ConvertElementType; -%unignore xla::swig::LocalComputationBuilder::BitcastConvertType; -%unignore xla::swig::LocalComputationBuilder::Call; -%unignore xla::swig::LocalComputationBuilder::Transpose; -%unignore xla::swig::LocalComputationBuilder::Rev; -%unignore xla::swig::LocalComputationBuilder::Clamp; -%unignore xla::swig::LocalComputationBuilder::Map; -%unignore xla::swig::LocalComputationBuilder::Reduce; -%unignore xla::swig::LocalComputationBuilder::ReduceWindowWithGeneralPadding; -%unignore xla::swig::LocalComputationBuilder::RngNormal; -%unignore xla::swig::LocalComputationBuilder::RngUniform; -%unignore xla::swig::LocalComputationBuilder::RngBernoulli; -%unignore xla::swig::LocalComputationBuilder::While; -%unignore xla::swig::LocalComputationBuilder::Conditional; -%unignore xla::swig::LocalComputationBuilder::IsConstant; -%unignore xla::swig::LocalComputationBuilder::Eq; -%unignore xla::swig::LocalComputationBuilder::Ne; -%unignore xla::swig::LocalComputationBuilder::Ge; -%unignore xla::swig::LocalComputationBuilder::Gt; -%unignore xla::swig::LocalComputationBuilder::Lt; -%unignore xla::swig::LocalComputationBuilder::Le; -%unignore xla::swig::LocalComputationBuilder::Dot; -%unignore xla::swig::LocalComputationBuilder::DotGeneral; -%unignore xla::swig::LocalComputationBuilder::ConvGeneralDilated; -%unignore xla::swig::LocalComputationBuilder::Add; -%unignore xla::swig::LocalComputationBuilder::Sub; -%unignore xla::swig::LocalComputationBuilder::Mul; -%unignore xla::swig::LocalComputationBuilder::Div; -%unignore xla::swig::LocalComputationBuilder::Rem; -%unignore xla::swig::LocalComputationBuilder::Max; -%unignore xla::swig::LocalComputationBuilder::Min; -%unignore xla::swig::LocalComputationBuilder::And; -%unignore xla::swig::LocalComputationBuilder::Or; -%unignore xla::swig::LocalComputationBuilder::Xor; -%unignore xla::swig::LocalComputationBuilder::ShiftLeft; -%unignore xla::swig::LocalComputationBuilder::ShiftRightArithmetic; -%unignore xla::swig::LocalComputationBuilder::ShiftRightLogical; -%unignore xla::swig::LocalComputationBuilder::Not; -%unignore xla::swig::LocalComputationBuilder::Abs; -%unignore xla::swig::LocalComputationBuilder::Exp; -%unignore xla::swig::LocalComputationBuilder::Expm1; -%unignore xla::swig::LocalComputationBuilder::Floor; -%unignore xla::swig::LocalComputationBuilder::Ceil; -%unignore xla::swig::LocalComputationBuilder::Round; -%unignore xla::swig::LocalComputationBuilder::Log; -%unignore xla::swig::LocalComputationBuilder::Log1p; -%unignore xla::swig::LocalComputationBuilder::Sign; -%unignore xla::swig::LocalComputationBuilder::Cos; -%unignore xla::swig::LocalComputationBuilder::Sin; -%unignore xla::swig::LocalComputationBuilder::Tanh; -%unignore xla::swig::LocalComputationBuilder::Atan2; -%unignore xla::swig::LocalComputationBuilder::IsFinite; -%unignore xla::swig::LocalComputationBuilder::Pow; -%unignore xla::swig::LocalComputationBuilder::Neg; -%unignore xla::swig::LocalComputationBuilder::Sort; -%unignore xla::swig::LocalComputationBuilder::SortKeyVal; -%unignore xla::swig::LocalComputationBuilder::Sqrt; -%unignore xla::swig::LocalComputationBuilder::Rsqrt; -%unignore xla::swig::LocalComputationBuilder::Square; -%unignore xla::swig::LocalComputationBuilder::Reciprocal; -%unignore xla::swig::LocalComputationBuilder::Erfc; -%unignore xla::swig::LocalComputationBuilder::Erf; -%unignore xla::swig::LocalComputationBuilder::ErfInv; -%unignore xla::swig::LocalComputationBuilder::Lgamma; -%unignore xla::swig::LocalComputationBuilder::Digamma; -%unignore xla::swig::LocalComputationBuilder::Acos; -%unignore xla::swig::LocalComputationBuilder::Asin; -%unignore xla::swig::LocalComputationBuilder::Atan; -%unignore xla::swig::LocalComputationBuilder::Tan; -%unignore xla::swig::LocalComputationBuilder::Acosh; -%unignore xla::swig::LocalComputationBuilder::Asinh; -%unignore xla::swig::LocalComputationBuilder::Atanh; -%unignore xla::swig::LocalComputationBuilder::Cosh; -%unignore xla::swig::LocalComputationBuilder::Sinh; -%unignore xla::swig::LocalComputationBuilder::Real; -%unignore xla::swig::LocalComputationBuilder::Imag; -%unignore xla::swig::LocalComputationBuilder::Conj; -%unignore xla::swig::LocalComputationBuilder::Complex; -%unignore xla::swig::LocalComputationBuilder::Cholesky; -%unignore xla::swig::LocalComputationBuilder::QR; -%unignore xla::swig::LocalComputationBuilder::TriangularSolve; -%unignore xla::swig::LocalComputationBuilder::CustomCall; -%unignore xla::swig::LocalComputationBuilder::Gather; -%unignore xla::swig::LocalComputationBuilder::Scatter; -%unignore xla::swig::DeleteLocalComputation; +%unignore xla::swig::ComputationBuilder; +%unignore xla::swig::ComputationBuilder::ComputationBuilder; +%unignore xla::swig::ComputationBuilder::Build; +%unignore xla::swig::ComputationBuilder::BuildWithRoot; +%unignore xla::swig::ComputationBuilder::SetOpMetadata; +%unignore xla::swig::ComputationBuilder::ClearOpMetadata; +%unignore xla::swig::ComputationBuilder::Parameter; +%unignore xla::swig::ComputationBuilder::GetShape; +%unignore xla::swig::ComputationBuilder::GetReturnValueShape; +%unignore xla::swig::ComputationBuilder::Infeed; +%unignore xla::swig::ComputationBuilder::Outfeed; +%unignore xla::swig::ComputationBuilder::ConstantLiteral; +%unignore xla::swig::ComputationBuilder::ConstantR0; +%unignore xla::swig::ComputationBuilder::Iota; +%unignore xla::swig::ComputationBuilder::BroadcastedIota; +%unignore xla::swig::ComputationBuilder::Broadcast; +%unignore xla::swig::ComputationBuilder::BroadcastInDim; +%unignore xla::swig::ComputationBuilder::Pad; +%unignore xla::swig::ComputationBuilder::Reshape; +%unignore xla::swig::ComputationBuilder::Collapse; +%unignore xla::swig::ComputationBuilder::AllToAll; +%unignore xla::swig::ComputationBuilder::CrossReplicaSum; +%unignore xla::swig::ComputationBuilder::Slice; +%unignore xla::swig::ComputationBuilder::SliceInDim; +%unignore xla::swig::ComputationBuilder::DynamicSlice; +%unignore xla::swig::ComputationBuilder::DynamicUpdateSlice; +%unignore xla::swig::ComputationBuilder::ConcatInDim; +%unignore xla::swig::ComputationBuilder::SelectAndScatterWithGeneralPadding; +%unignore xla::swig::ComputationBuilder::Select; +%unignore xla::swig::ComputationBuilder::Tuple; +%unignore xla::swig::ComputationBuilder::GetTupleElement; +%unignore xla::swig::ComputationBuilder::ConvertElementType; +%unignore xla::swig::ComputationBuilder::BitcastConvertType; +%unignore xla::swig::ComputationBuilder::Call; +%unignore xla::swig::ComputationBuilder::Transpose; +%unignore xla::swig::ComputationBuilder::Rev; +%unignore xla::swig::ComputationBuilder::Clamp; +%unignore xla::swig::ComputationBuilder::Map; +%unignore xla::swig::ComputationBuilder::Reduce; +%unignore xla::swig::ComputationBuilder::ReduceWindowWithGeneralPadding; +%unignore xla::swig::ComputationBuilder::RngNormal; +%unignore xla::swig::ComputationBuilder::RngUniform; +%unignore xla::swig::ComputationBuilder::RngBernoulli; +%unignore xla::swig::ComputationBuilder::While; +%unignore xla::swig::ComputationBuilder::Conditional; +%unignore xla::swig::ComputationBuilder::IsConstant; +%unignore xla::swig::ComputationBuilder::Eq; +%unignore xla::swig::ComputationBuilder::Ne; +%unignore xla::swig::ComputationBuilder::Ge; +%unignore xla::swig::ComputationBuilder::Gt; +%unignore xla::swig::ComputationBuilder::Lt; +%unignore xla::swig::ComputationBuilder::Le; +%unignore xla::swig::ComputationBuilder::Dot; +%unignore xla::swig::ComputationBuilder::DotGeneral; +%unignore xla::swig::ComputationBuilder::ConvGeneralDilated; +%unignore xla::swig::ComputationBuilder::Add; +%unignore xla::swig::ComputationBuilder::Sub; +%unignore xla::swig::ComputationBuilder::Mul; +%unignore xla::swig::ComputationBuilder::Div; +%unignore xla::swig::ComputationBuilder::Rem; +%unignore xla::swig::ComputationBuilder::Max; +%unignore xla::swig::ComputationBuilder::Min; +%unignore xla::swig::ComputationBuilder::And; +%unignore xla::swig::ComputationBuilder::Or; +%unignore xla::swig::ComputationBuilder::Xor; +%unignore xla::swig::ComputationBuilder::ShiftLeft; +%unignore xla::swig::ComputationBuilder::ShiftRightArithmetic; +%unignore xla::swig::ComputationBuilder::ShiftRightLogical; +%unignore xla::swig::ComputationBuilder::Not; +%unignore xla::swig::ComputationBuilder::Abs; +%unignore xla::swig::ComputationBuilder::Exp; +%unignore xla::swig::ComputationBuilder::Expm1; +%unignore xla::swig::ComputationBuilder::Floor; +%unignore xla::swig::ComputationBuilder::Ceil; +%unignore xla::swig::ComputationBuilder::Round; +%unignore xla::swig::ComputationBuilder::Log; +%unignore xla::swig::ComputationBuilder::Log1p; +%unignore xla::swig::ComputationBuilder::Sign; +%unignore xla::swig::ComputationBuilder::Cos; +%unignore xla::swig::ComputationBuilder::Sin; +%unignore xla::swig::ComputationBuilder::Tanh; +%unignore xla::swig::ComputationBuilder::Atan2; +%unignore xla::swig::ComputationBuilder::IsFinite; +%unignore xla::swig::ComputationBuilder::Pow; +%unignore xla::swig::ComputationBuilder::Neg; +%unignore xla::swig::ComputationBuilder::Sort; +%unignore xla::swig::ComputationBuilder::SortKeyVal; +%unignore xla::swig::ComputationBuilder::Sqrt; +%unignore xla::swig::ComputationBuilder::Rsqrt; +%unignore xla::swig::ComputationBuilder::Square; +%unignore xla::swig::ComputationBuilder::Reciprocal; +%unignore xla::swig::ComputationBuilder::Erfc; +%unignore xla::swig::ComputationBuilder::Erf; +%unignore xla::swig::ComputationBuilder::ErfInv; +%unignore xla::swig::ComputationBuilder::Lgamma; +%unignore xla::swig::ComputationBuilder::Digamma; +%unignore xla::swig::ComputationBuilder::Acos; +%unignore xla::swig::ComputationBuilder::Asin; +%unignore xla::swig::ComputationBuilder::Atan; +%unignore xla::swig::ComputationBuilder::Tan; +%unignore xla::swig::ComputationBuilder::Acosh; +%unignore xla::swig::ComputationBuilder::Asinh; +%unignore xla::swig::ComputationBuilder::Atanh; +%unignore xla::swig::ComputationBuilder::Cosh; +%unignore xla::swig::ComputationBuilder::Sinh; +%unignore xla::swig::ComputationBuilder::Real; +%unignore xla::swig::ComputationBuilder::Imag; +%unignore xla::swig::ComputationBuilder::Conj; +%unignore xla::swig::ComputationBuilder::Complex; +%unignore xla::swig::ComputationBuilder::Cholesky; +%unignore xla::swig::ComputationBuilder::QR; +%unignore xla::swig::ComputationBuilder::TriangularSolve; +%unignore xla::swig::ComputationBuilder::CustomCall; +%unignore xla::swig::ComputationBuilder::Gather; +%unignore xla::swig::ComputationBuilder::Scatter; +%unignore xla::swig::DeleteComputation; %unignore xla::swig::DestructureLocalShapedBufferTuple; %unignore xla::swig::DestructureXrtAllocationTuple; %unignore xla::swig::DeleteLocalShapedBuffer; %unignore xla::swig::DeleteXrtAllocation; -%unignore xla::swig::DeleteCompiledLocalComputation; -%unignore xla::swig::DeleteCompiledXrtComputation; +%unignore xla::swig::DeleteLocalExecutable; +%unignore xla::swig::DeleteXrtExecutable; %thread; %include "tensorflow/compiler/xla/python/local_computation_builder.h" diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index d13bc73b25..e8379a677a 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -36,7 +36,7 @@ from tensorflow.compiler.xla.service import hlo_pb2 # Most functions are snake_case for consistency with other modules, whereas -# method names of ComputationBuilder and LocalComputation are CamelCase for +# method names of ComputationBuilder and Computation are CamelCase for # consistency with XLA. # pylint: disable=invalid-name @@ -112,8 +112,8 @@ class XlaLocalBackend(Backend): return c_computation.Compile(argument_shapes, compile_options) def delete_executable(self, executable): - assert isinstance(executable, c_api.CompiledLocalComputation) - c_api.DeleteCompiledLocalComputation(executable) + assert isinstance(executable, c_api.LocalExecutable) + c_api.DeleteLocalExecutable(executable) def execute(self, executable, args): return executable.Execute(args) @@ -150,8 +150,8 @@ class XrtBackend(Backend): _maybe_encode_string(self.target)) def delete_executable(self, executable): - assert isinstance(executable, c_api.CompiledXrtComputation) - c_api.DeleteCompiledXrtComputation(executable) + assert isinstance(executable, c_api.XrtExecutable) + c_api.DeleteXrtExecutable(executable) def execute(self, executable, args): return executable.Execute(args) @@ -629,28 +629,24 @@ def transfer_from_outfeed(shape, replica_number=None): return c_api.TransferFromOutfeedLocalReplica(shape, replica_number or 0) -class LocalComputation(object): +class Computation(object): """Python wrapper for a local XLA Computation. - A LocalComputation can be executed if it is compiled. Otherwise, it + A Computation can be compiled to form an Executable. Otherwise, it can still be used as a Computation where required by the ComputationBuilder methods. """ - def __init__(self, c_computation, is_compiled, backend=XLA_LOCAL_BACKEND): + def __init__(self, c_computation, backend=XLA_LOCAL_BACKEND): self._c_computation = c_computation self._backend = backend - self._is_compiled = is_compiled @property def computation(self): - if self._is_compiled: - raise ValueError( - 'Attempt to read the XLA computation of a compiled LocalComputation.') return self._c_computation def GetProto(self): - """Get the HloModuleProto proto object in this local computation. + """Get the HloModuleProto proto object in this computation. Returns: An HloModuleProto proto object that has the whole-graph information. @@ -660,29 +656,21 @@ class LocalComputation(object): return proto def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None): - """Compiles an un-compiled local computation. - - Local computations are the result of a "LocalComputationBuild'ing" process - -- they start in uncompiled form, and via a call to Compile() turn into a - compiled local computation. + """Compiles a computation. - Raises: - ValueError: if this is already a compiled local computation. + Computations are the result of a "ComputationBuild'ing" process. Arguments: argument_shapes: parameter shapes -- they are first laid out by layout_fn if layout_fn is provided. Otherwise, the default layout for those shapes will be used. - compile_options: options to use for compilation, includes an optional - laid out result shape for the computation. + compile_options: options to use for compilation, includes an optional laid + out result shape for the computation. layout_fn: lambda that is used to lay out the argument/result shapes. Returns: - A newly *compiled* local computation instance. + A Executable instance. """ - if self._is_compiled: - raise ValueError('Attempt to compile a compiled local XLA computation.') - result_shape = _wrap_shape(self.computation.GetReturnValueShape()) if layout_fn: @@ -697,7 +685,7 @@ class LocalComputation(object): compile_options.result_shape = result_shape c = self._backend.compile(self.computation, argument_shapes, compile_options) - return LocalComputation(c, is_compiled=True, backend=self._backend) + return Executable(c, backend=self._backend) def CompileWithExampleArguments(self, arguments=(), @@ -716,12 +704,26 @@ class LocalComputation(object): def GetReturnValueShape(self): return _wrap_shape(self._c_computation.GetReturnValueShape()) + def __del__(self): + # Python may have freed c_api first. + if c_api and self._c_computation: + assert isinstance(self._c_computation, c_api.Computation) + c_api.DeleteComputation(self._c_computation) + + +class Executable(object): + """Python wrapper for an XLA Executable.""" + + def __init__(self, c_executable, backend=XLA_LOCAL_BACKEND): + self._c_executable = c_executable + self._backend = backend + def Execute(self, arguments=(), check_for_deleted_args=True): """Execute on one replica with LocalBuffer arguments and return value.""" if check_for_deleted_args and any(arg.is_deleted() for arg in arguments): raise ValueError('Executing with deleted local buffer argument') raw_args = [arg.c_buffer for arg in arguments] - output_buffer = self._backend.execute(self._c_computation, raw_args) + output_buffer = self._backend.execute(self._c_executable, raw_args) return LocalBuffer(output_buffer, backend=self._backend, replica=0) def ExecutePerReplica(self, arguments=None): @@ -736,8 +738,6 @@ class LocalComputation(object): a shallow sequence of arguments was passed in for `arguments`, then the sole, zero'th replica's output is returned instead, as a LocalBuffer. """ - if not self._is_compiled: - raise ValueError('Cannot execute an uncompiled local XLA computation.') if arguments is None: arguments = ((),) * get_replica_count() else: @@ -759,8 +759,8 @@ class LocalComputation(object): ] # Execute - output_buffers = self._backend.execute_replicated( - self._c_computation, stripped_args) + output_buffers = self._backend.execute_replicated(self._c_executable, + stripped_args) # Wrap output handles in LocalBuffer instances return tuple( @@ -789,12 +789,8 @@ class LocalComputation(object): def __del__(self): # Python may have freed c_api first. - if c_api and self._c_computation: - if self._is_compiled: - self._backend.delete_executable(self._c_computation) - else: - assert isinstance(self._c_computation, c_api.LocalComputation) - c_api.DeleteLocalComputation(self._c_computation) + if c_api and self._c_executable: + self._backend.delete_executable(self._c_executable) def _make_replica_group_proto(replica_group): @@ -807,8 +803,8 @@ class ComputationBuilder(object): """XLA computation builder. Enqueues XLA ops in sequence and in order to build a - LocalComputation, which in turn can be compiled into a - CompiledLocalComputation, which in turn can be locally executed. + Computation, which in turn can be compiled into a + LocalExecutable, which in turn can be locally executed. """ # The methods of this class map 1-to-1 onto the XLA C++ @@ -819,16 +815,14 @@ class ComputationBuilder(object): # pylint: disable=g-doc-args def __init__(self, name): - self._client = c_api.LocalComputationBuilder(name.encode('utf8')) + self._client = c_api.ComputationBuilder(name.encode('utf8')) self._parameter_numbering = itertools.count() def Build(self, root=None, backend=XLA_LOCAL_BACKEND): if root is not None: - return LocalComputation( - self._client.BuildWithRoot(root), is_compiled=False, backend=backend) + return Computation(self._client.BuildWithRoot(root), backend=backend) else: - return LocalComputation( - self._client.Build(), is_compiled=False, backend=backend) + return Computation(self._client.Build(), backend=backend) def SetOpMetadata(self, op_metadata): """Set metadata for operations that are about to be enqueued.""" @@ -1480,7 +1474,7 @@ class ComputationBuilder(object): Args: operand: a LocalOp to test. - Returns: a LocalComputation that is rooted on the given `operand` which is a + Returns: a Computation that is rooted on the given `operand` which is a compile-time constant. """ return self._client.BuildConstantSubGraph(operand) @@ -1681,7 +1675,7 @@ def _forward_methods_to_local_builder(): Set up methods, corresponding to unary and binary XLA operations, whose calls are forwarded in a boilerplate manner to the underlying - LocalComputationBuilder C-extension API. + ComputationBuilder C-extension API. """ def forward_to_local_builder_with_handles(target_method, is_binop=False): @@ -1701,13 +1695,13 @@ def _forward_methods_to_local_builder(): for method_name in _UNARY_OPS: forward = forward_to_local_builder_with_handles( - getattr(c_api.LocalComputationBuilder, method_name)) + getattr(c_api.ComputationBuilder, method_name)) forward.__name__ = method_name setattr(ComputationBuilder, method_name, forward) for method_name in _BINARY_OPS: forward = forward_to_local_builder_with_handles( - getattr(c_api.LocalComputationBuilder, method_name), is_binop=True) + getattr(c_api.ComputationBuilder, method_name), is_binop=True) forward.__name__ = method_name setattr(ComputationBuilder, method_name, forward) diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index aa38c06cf9..f830cb26e3 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -29,7 +29,7 @@ from tensorflow.compiler.xla.python import xla_client import unittest -class LocalComputationTest(unittest.TestCase): +class ComputationTest(unittest.TestCase): """Base class for running an XLA Computation through the local client.""" def _NewComputation(self, name=None): @@ -85,7 +85,7 @@ def NumpyArrayBool(*args, **kwargs): return np.array(*args, dtype=np.bool, **kwargs) -class ComputationsWithConstantsTest(LocalComputationTest): +class ComputationsWithConstantsTest(ComputationTest): """Tests focusing on Constant ops.""" def testConstantScalarSumS8(self): @@ -304,7 +304,7 @@ class ComputationsWithConstantsTest(LocalComputationTest): self._ExecuteAndCompareClose(c, expected=0.75) -class ParametersTest(LocalComputationTest): +class ParametersTest(ComputationTest): """Tests focusing on Parameter ops and argument-passing.""" def setUp(self): @@ -384,7 +384,7 @@ class ParametersTest(LocalComputationTest): expected=[-4.3, 1.3, -6.3, 3.3]) -class LocalBufferTest(LocalComputationTest): +class LocalBufferTest(ComputationTest): """Tests focusing on execution with LocalBuffers.""" def _Execute(self, c, arguments): @@ -482,7 +482,7 @@ class LocalBufferTest(LocalComputationTest): self.assertEqual(np.dtype(xla_shape.element_type()), np.dtype(np.float32)) -class SingleOpTest(LocalComputationTest): +class SingleOpTest(ComputationTest): """Tests for single ops. The goal here is smoke testing - to exercise the most basic functionality of @@ -1175,7 +1175,7 @@ class SingleOpTest(LocalComputationTest): np.testing.assert_allclose(g, expected, rtol=1e-4) -class EmbeddedComputationsTest(LocalComputationTest): +class EmbeddedComputationsTest(ComputationTest): """Tests for XLA graphs with embedded computations (such as maps).""" def _CreateConstantS32Computation(self): @@ -1639,7 +1639,7 @@ class EmbeddedComputationsTest(LocalComputationTest): self._ExecuteAndCompareClose(c, expected=expected) -class ErrorTest(LocalComputationTest): +class ErrorTest(ComputationTest): def setUp(self): self.f32_scalar_2 = NumpyArrayF32(2.0) @@ -1656,7 +1656,7 @@ class ErrorTest(LocalComputationTest): lambda: c.Build().CompileWithExampleArguments([self.f32_scalar_2])) -class ComputationRootTest(LocalComputationTest): +class ComputationRootTest(ComputationTest): """Tests related to setting the root of the computation.""" def testComputationRootDifferentFromLastOp(self): -- GitLab From a06adad826c41c689045e7e6cff72f5030b0b6f0 Mon Sep 17 00:00:00 2001 From: Taylor Robie Date: Wed, 13 Feb 2019 09:49:24 -0800 Subject: [PATCH 042/351] Lift keras eager execution into separate FuncGraphs. PiperOrigin-RevId: 233772591 --- tensorflow/python/eager/def_function.py | 6 +- tensorflow/python/eager/lift_to_graph.py | 214 +++++++++++++++--- tensorflow/python/eager/wrap_function.py | 3 +- tensorflow/python/keras/backend.py | 138 ++++++++--- .../engine/distributed_training_utils.py | 41 ++-- 5 files changed, 315 insertions(+), 87 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index 7b8ceb979b..a82b14ec2e 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -162,7 +162,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable): placeholder_ops = set( [tensor.op for tensor in function_placeholders]) lifted_initializer = lift_to_graph.lift_to_graph( - initial_value, outer_graph, + [initial_value], outer_graph, disallowed_placeholders=placeholder_ops)[initial_value] with ops.init_scope(): self._initial_value = lifted_initializer @@ -497,7 +497,7 @@ class Function(object): # Ignore variables which are already initialized at trace time. continue v.assign(lift_to_graph.lift_to_graph( - init, ops.get_default_graph())[init]) + [init], ops.get_default_graph())[init]) with ops.init_scope(): return initialize_variables.get_concrete_function()() @@ -538,7 +538,7 @@ class Function(object): def initialize_variables(): for v, init in initializer_map.items(): v.assign(lift_to_graph.lift_to_graph( - init, ops.get_default_graph())[init]) + [init], ops.get_default_graph())[init]) return initialize_variables.get_concrete_function() diff --git a/tensorflow/python/eager/lift_to_graph.py b/tensorflow/python/eager/lift_to_graph.py index d2ccdd5a41..e7c93481d3 100644 --- a/tensorflow/python/eager/lift_to_graph.py +++ b/tensorflow/python/eager/lift_to_graph.py @@ -21,8 +21,10 @@ from __future__ import print_function import collections +from tensorflow.python.framework import func_graph from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import resource_variable_ops def _graph_inputs(op): @@ -40,49 +42,191 @@ class UnliftableError(Exception): pass -def lift_to_graph(init_tensor, graph, sources=None, - disallowed_placeholders=None): - """Copies the tensor and all its inputs recursively to the outer graph. +def _constant_inputs(op_or_tensor): + return all(_as_operation(i).type == u"Const" + and not _as_operation(i).control_inputs + for i in _graph_inputs(_as_operation(op_or_tensor))) - Args: - init_tensor: The Tensor to lift. - graph: The graph to lift to. - sources: Optional sequence of nodes to start from. If omitted the whole - subgraph which feeds into `init_tensor` is lifted. + +def _map_subgraph(init_tensor, sources, disallowed_placeholders, visited_ops, + op_outputs, add_sources): + """Walk a Graph and capture the subgraph between init_tensor and sources. + + Note: This function mutates visited_ops and op_outputs. + + Arguments: + init_tensor: A Tensor or Operation where the subgraph terminates. + sources: A set of Tensors where subgraph extraction should stop. disallowed_placeholders: An optional set of ops which may not appear in the lifted graph. Defaults to all placeholders. + visited_ops: A set of operations which were visited in a prior pass. + op_outputs: A defaultdict containing the outputs of an op which are to be + copied into the new subgraph. + add_sources: A boolean indicating whether placeholders which are not in + sources should be allowed. Returns: - A mapping from ops in the current default graph to ops in `graph`. + The set of placeholders upon which init_tensor depends and are not in + sources. Raises: - UnliftableError: If a placeholder blocks lifting. + UnliftableError: if init_tensor depends on a placeholder which is not in + sources and add_sources is False. """ - # Check that the initializer does not depend on any placeholders. - if sources is None: - sources = set([]) - visited_ops = set([x.op for x in sources]) ops_to_visit = [_as_operation(init_tensor)] - op_outputs = collections.defaultdict(set) + extra_sources = set() while ops_to_visit: op = ops_to_visit.pop() if op in visited_ops: continue visited_ops.add(op) - if ((disallowed_placeholders is not None and op in disallowed_placeholders) - or (disallowed_placeholders is None and op.type == "Placeholder")): + + should_raise = False + if disallowed_placeholders is not None and op in disallowed_placeholders: + should_raise = True + elif op.type == "Placeholder": + if disallowed_placeholders is None and not add_sources: + should_raise = True + extra_sources.update(op.outputs) + + if should_raise: raise UnliftableError( "Unable to lift tensor", init_tensor, "because it depends transitively on placeholder ", op) + for inp in _graph_inputs(op): op_outputs[inp].add(op) - if inp not in visited_ops and inp not in sources: + if inp not in visited_ops and inp not in (sources or extra_sources): ops_to_visit.append(inp) + + return extra_sources + + +def _copy_non_source(op, graph, op_map): + """Copy an op directly to a given graph. + + This function assumes that all of the inputs to an op have already been + copied. + + Args: + op: The op to be copied. + graph: The destination graph. + op_map: A dict mapping ops and tensors in the old graph to the new one. + """ + copied_inputs = [op_map[x] for x in op.inputs] + copied_control_inputs = [op_map[x] for x in op.control_inputs] + with ops.control_dependencies(copied_control_inputs), ops.device(op.device): + copied_op = graph.create_op( + op_type=op.type, + inputs=copied_inputs, + dtypes=[x.dtype for x in op.outputs], + attrs=op.node_def.attr, + name=op.name) + op_map[op] = copied_op + for i, o in enumerate(op.outputs): + op_map[o] = copied_op.outputs[i] + + +def _copy_source(s, graph, op_map, handle_captures, inverse_captures): + """Create a source in a graph based on a Tensor from a different graph. + + This function creates a placeholder analog of `s` in a graph with the + following behavior: + + 1) If s is a captured Tensor or Variable and handle_captures is set to True, + simply capture it in the new graph as well. + + 2) If s is a PlaceholderWithDefault whose default is a constant, preserve + said default in the new graph. + + 3) When applicable, copy resource variable metadata from `s` to the newly + created placeholder. + + Args: + s: The source of interest. + graph: The destination graph. + op_map: A dict mapping ops and tensors in the old graph to the new one. + handle_captures: A boolean indicating whether to re-capture s in the new + graph or simply create a vanilla placeholder. + inverse_captures: A dict mapping s back to the Tensor or Variable that it + captures. + """ + if handle_captures and s in inverse_captures: + copied_placeholder = graph.capture(inverse_captures[s], name=s.op.name) + elif s.op.type == "PlaceholderWithDefault" and _constant_inputs(s): + # Copy the default value to the graph. + default_value = s.op.inputs[0] + _copy_non_source(op=default_value.op, graph=graph, op_map=op_map) + + with ops.device(s.op.device): + copied_placeholder = array_ops.placeholder_with_default( + input=op_map[default_value], shape=s.shape, name=s.op.name) + else: + with ops.device(s.op.device): + copied_placeholder = array_ops.placeholder( + dtype=s.dtype, shape=s.shape, name=s.op.name) + + base_handle = resource_variable_ops.get_resource_handle_data(s) + if base_handle.shape_and_type: + resource_variable_ops._set_handle_shapes_and_types( # pylint: disable=protected-access + copied_placeholder, + base_handle, + graph_mode=True) + + op_map[s] = copied_placeholder + + +def lift_to_graph(init_tensors, graph, sources=None, + disallowed_placeholders=None, add_sources=False, + handle_captures=False, base_graph=None): + """Copies the tensor and all its inputs recursively to the outer graph. + + Args: + init_tensors: The Tensor to lift. + graph: The graph to lift to. + sources: Optional sequence of nodes to start from. If omitted the whole + subgraph which feeds into `init_tensor` is lifted. + disallowed_placeholders: An optional set of ops which may not appear in the + lifted graph. Defaults to all placeholders. + add_sources: A boolean indicating whether placeholders which are not in + sources should be allowed. + handle_captures: A boolean indicating whether to re-capture s in the new + graph or simply create a vanilla placeholder. + base_graph: The graph from which to lift ops. This will be inferred if not + specified. + + Returns: + A mapping from ops in the current default graph to ops in `graph`. + + Raises: + UnliftableError: If a placeholder blocks lifting. + """ + variable_init_tensors = {i for i in init_tensors if isinstance( + i, resource_variable_ops.ResourceVariable)} + init_tensors = set(init_tensors).difference(variable_init_tensors) + base_graph = base_graph or list(init_tensors)[0].graph + + # Check that the initializer does not depend on any placeholders. + sources = set(sources or []) + visited_ops = set([x.op for x in sources]) + op_outputs = collections.defaultdict(set) + + # First we extract the subgraph between init_tensors and sources. + for init_tensor in init_tensors: + sources.update(_map_subgraph( + init_tensor=init_tensor, + sources=sources, + disallowed_placeholders=disallowed_placeholders, + visited_ops=visited_ops, + op_outputs=op_outputs, + add_sources=add_sources)) + # Topologically sort the nodes we've extracted. Now we know how many of their # outputs are part of this subgraph. ops_to_copy = [] marked_ops = set([]) - ops_to_visit = [_as_operation(init_tensor)] + ops_to_visit = [_as_operation(t) for t in init_tensors + if not op_outputs[_as_operation(t)]] while ops_to_visit: op = ops_to_visit.pop() if op in marked_ops: @@ -90,28 +234,34 @@ def lift_to_graph(init_tensor, graph, sources=None, marked_ops.add(op) ops_to_copy.append(op) for inp in _graph_inputs(op): - if all(x in marked_ops for x in op_outputs[inp]) and inp not in sources: + if (all(x in marked_ops for x in op_outputs[inp]) and + inp not in sources): ops_to_visit.append(inp) + + # When lifting from one FuncGraph to another, we will need to capture the + # relevant tensors as well. + captures = collections.OrderedDict() + if (isinstance(base_graph, func_graph.FuncGraph) and + isinstance(graph, func_graph.FuncGraph)): + captures = base_graph.captures + inverse_captures = {v: k for k, v in captures.items()} + # ops_to_copy now holds a reverse topologically sorted list of ops which # ends in the initializer. We copy those to the outermost graph and # build the initialization op there. with graph.as_default(): - op_map = {} + op_map = {i: i for i in variable_init_tensors} # Pass through variables. source_ops = set() for s in sources: source_ops.add(s.op) - op_map[s] = array_ops.placeholder(dtype=s.dtype, shape=s.shape, - name=s.op.name) + _copy_source(s=s, graph=graph, op_map=op_map, + handle_captures=handle_captures, + inverse_captures=inverse_captures) + for op in reversed(ops_to_copy): if op in source_ops: continue - copied_inputs = [op_map[x] for x in op.inputs] - copied_control_inputs = [op_map[x] for x in op.control_inputs] - with ops.control_dependencies(copied_control_inputs): - copied_op = graph.create_op( - op.type, copied_inputs, [x.dtype for x in op.outputs], - attrs=op.node_def.attr, name=op.name) - op_map[op] = copied_op - for i, o in enumerate(op.outputs): - op_map[o] = copied_op.outputs[i] + + _copy_non_source(op=op, graph=graph, op_map=op_map) + return op_map diff --git a/tensorflow/python/eager/wrap_function.py b/tensorflow/python/eager/wrap_function.py index 7b1de99406..88aa2d26a4 100644 --- a/tensorflow/python/eager/wrap_function.py +++ b/tensorflow/python/eager/wrap_function.py @@ -156,8 +156,7 @@ class WrappedFunction(function.ConcreteFunction): identity_fetches = [] sink_tensor = array_ops.zeros([]) lift_map = lift_to_graph.lift_to_graph( - sink_tensor, pruned_graph, - sources=flat_feeds + internal_captures) + [sink_tensor], pruned_graph, sources=flat_feeds + internal_captures) for original_fetch, identity_fetch in zip( tensor_fetches, identity_fetches): lift_map[original_fetch] = lift_map[identity_fetch] diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 5d52e33cd5..fc2e8aae68 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -37,6 +37,7 @@ from tensorflow.python.distribute import distribute_coordinator_context as dc_co from tensorflow.python.distribute import distribution_strategy_context from tensorflow.python.eager import context from tensorflow.python.eager import function as eager_function +from tensorflow.python.eager import lift_to_graph from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import func_graph @@ -79,6 +80,9 @@ py_sum = sum # while executing eagerly (such as the functional API for model-building). _GRAPH = None +# A graph which is used for constructing functions in eager mode. +_CURRENT_SCRATCH_GRAPH = None + # This is a thread local object that will hold the default internal TF session # used by Keras. It can be set manually via `set_session(sess)`. _SESSION = threading.local() @@ -459,6 +463,40 @@ def get_graph(): return ops.get_default_graph() +@tf_contextlib.contextmanager +def _scratch_graph(graph=None): + """Retrieve a shared and temporary func graph. + + The eager execution path lifts a subgraph from the keras global graph into + a scratch graph in order to create a function. DistributionStrategies, in + turn, constructs multiple functions as well as a final combined function. In + order for that logic to work correctly, all of the functions need to be + created on the same scratch FuncGraph. + + Args: + graph: A graph to be used as the current scratch graph. If not set then + a scratch graph will either be retrieved or created: + + Yields: + The current scratch graph. + """ + global _CURRENT_SCRATCH_GRAPH + if (_CURRENT_SCRATCH_GRAPH is not None and graph is not None and + _CURRENT_SCRATCH_GRAPH is not graph): + raise ValueError('Multiple scratch graphs specified.') + + if _CURRENT_SCRATCH_GRAPH: + yield _CURRENT_SCRATCH_GRAPH + return + + graph = graph or func_graph.FuncGraph('keras_scratch_graph') + try: + _CURRENT_SCRATCH_GRAPH = graph + yield graph + finally: + _CURRENT_SCRATCH_GRAPH = None + + @keras_export('keras.backend.set_session') def set_session(session): """Sets the global TensorFlow session. @@ -3065,47 +3103,79 @@ class EagerExecutionFunction(object): """ def __init__(self, inputs, outputs, updates=None, name=None): + self.name = name + self._outputs_structure = outputs + inputs = nest.flatten(inputs) + outputs = nest.flatten(outputs) + updates = updates or [] if not isinstance(updates, (list, tuple)): raise TypeError('`updates` in a Keras backend function ' 'should be a list or tuple.') - self.name = name - self.inputs = nest.flatten(inputs) - self._outputs_structure = outputs - graph = get_graph() + + if updates and not outputs: + # Edge case; never happens in practice + raise ValueError('Cannot create a Keras backend function with updates' + ' but no outputs during eager execution.') + + graphs = {i.graph for i in nest.flatten([inputs, outputs, updates]) + if hasattr(i, 'graph')} + if len(graphs) > 1: + raise ValueError('Cannot create an execution function which is comprised ' + 'of elements from multiple graphs.') + + source_graph = graphs.pop() + global_graph = get_graph() + + updates_ops = [] + legacy_update_ops = [] + for update in updates: + # For legacy reasons it is allowed to pass an update as a tuple + # `(variable, new_value)` (this maps to an assign op). Otherwise it + # is assumed to already be an op -- we cannot control its execution + # order. + if isinstance(update, tuple): + legacy_update_ops.append(update) + else: + if hasattr(update, 'op'): + update = update.op + updates_ops.append(update) + + with _scratch_graph() as exec_graph: + global_graph = get_graph() + if source_graph not in (exec_graph, global_graph): + raise ValueError('Unknown graph. Aborting.') + + if source_graph is global_graph and exec_graph is not global_graph: + init_tensors = ( + outputs + updates_ops + [p for [p, _] in legacy_update_ops] + + [p_new for [_, p_new] in legacy_update_ops + if isinstance(p_new, ops.Tensor)]) + lifted_map = lift_to_graph.lift_to_graph( + init_tensors=init_tensors, graph=exec_graph, sources=inputs, + add_sources=True, handle_captures=True, base_graph=source_graph) + + inputs = [lifted_map[i] for i in inputs] + outputs = [lifted_map[i] for i in outputs] + updates_ops = [lifted_map[i] for i in updates_ops] + legacy_update_ops = [(lifted_map[p], lifted_map.get(p_new, p_new)) + for p, p_new in legacy_update_ops] + # Consolidate updates - with graph.as_default(): - self.outputs = cast_variables_to_tensor(nest.flatten(outputs)) - with ops.control_dependencies(self.outputs): - # In general, updates should be run after the outputs have been - # computed. However, we can only ensure this when we create - # the updates here (i.e. when updates are passed as tuples). - # We cannot modify the control dependencies of preexisting update ops. - updates_ops = [] - for update in updates: - # For legacy reasons it is allowed to pass an update as a tuple - # `(variable, new_value)` (this maps to an assign op). - if isinstance(update, tuple): - p, new_p = update - updates_ops.append(state_ops.assign(p, new_p)) - else: - # Assumed already an op -- we cannot control its execution order. - updates_ops.append(update) - - # We set the update ops to run at the end by conditioning it on output[0] - if updates and not self.outputs: - # Edge case; never happens in practice - raise ValueError('Cannot create a Keras backend function with updates' - ' but no outputs during eager execution.') + with exec_graph.as_default(): + outputs = cast_variables_to_tensor(outputs) + with ops.control_dependencies(outputs): + for p, p_new in legacy_update_ops: + updates_ops.append(state_ops.assign(p, p_new)) + + self.inputs, self.outputs = inputs, outputs with ops.control_dependencies(updates_ops): self.outputs[0] = array_ops.identity(self.outputs[0]) - # Prepare graph function - # TODO(fchollet): can we restrict `captures` to variables actually used in - # the relevant subgraph? - graph.inputs = self.inputs + list(graph.captures.values()) - graph.outputs = self.outputs - graph_fn = eager_function.ConcreteFunction(graph) + exec_graph.inputs = self.inputs + list(exec_graph.captures.values()) + exec_graph.outputs = self.outputs + graph_fn = eager_function.ConcreteFunction(exec_graph) + graph_fn._num_positional_args = len(self.inputs) graph_fn._arg_keywords = [] self._graph_fn = graph_fn @@ -3113,7 +3183,7 @@ class EagerExecutionFunction(object): # Handle placeholders with default # (treated as required placeholder by graph functions) self._placeholder_default_values = {} - with graph.as_default(): + with exec_graph.as_default(): for x in self.inputs: if x.op.type == 'PlaceholderWithDefault': self._placeholder_default_values[x] = tensor_util.constant_value( diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py index de6968e103..76ddbbeefc 100644 --- a/tensorflow/python/keras/engine/distributed_training_utils.py +++ b/tensorflow/python/keras/engine/distributed_training_utils.py @@ -805,22 +805,31 @@ def _make_eager_execution_function(model, mode): # NOTE(priyag): Try creating a new FuncGraph within DS scope instead of using # the global one. strategy = model._distribution_strategy - with K.get_graph().as_default(), strategy.scope(): - # Create train ops on each of the devices when we call - # `_per_device_fit_function`. - (grouped_inputs, grouped_outputs) = strategy.extended.call_for_each_replica( - _per_device_function, args=(get_distributed_model(model, mode),)) - - # Unwrap all the per device values returned from `call_for_each_replica`. - # Unwrapping per device values gives you a list of values that can be - # used to construct a new train function that is composed of inptus/outputs - # on all the devices over which the model is distributed. - (all_inputs, all_outputs, _, _) = unwrap_values( - strategy, - grouped_inputs, - grouped_outputs, - with_loss_tensor=(mode != ModeKeys.PREDICT)) - + global_graph = K.get_graph() + + with global_graph.as_default(), strategy.scope(): + # First we gather the relevant portions of the model across all replicas. + # `K._scratch_graph(global_graph)` signals to Keras that it should not + # lift to a separate graph when creating the per-replica functions. + with K._scratch_graph(global_graph): + # Create train ops on each of the devices when we call + # `_per_device_fit_function`. + grouped = strategy.extended.call_for_each_replica( + _per_device_function, args=(get_distributed_model(model, mode),)) + grouped_inputs, grouped_outputs = grouped + + # Unwrap all the per device values returned from `call_for_each_replica`. + # Unwrapping per device values gives you a list of values that can be + # used to construct a new train function that is composed of + # inputs/outputs on all the devices over which the model is distributed. + (all_inputs, all_outputs, _, _) = unwrap_values( + strategy, + grouped_inputs, + grouped_outputs, + with_loss_tensor=(mode != ModeKeys.PREDICT)) + + # Finally, a joint Keras function is created; this one will be created in + # a separate FuncGraph. return K.function( all_inputs, all_outputs, -- GitLab From 24ed0299b4537ae2ffe4a8c2c7b7051f6d8b5494 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 13 Feb 2019 09:56:02 -0800 Subject: [PATCH 043/351] Feed asset paths in tf.save_model.load when loading 1.x SavedModels Adds a simple table test. PiperOrigin-RevId: 233773824 --- .../python/saved_model/load_v1_in_v2.py | 13 +++-- .../python/saved_model/load_v1_in_v2_test.py | 51 +++++++++++++++++++ tensorflow/python/saved_model/loader_impl.py | 4 +- 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/saved_model/load_v1_in_v2.py b/tensorflow/python/saved_model/load_v1_in_v2.py index b1d56dc915..bba20541ea 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2.py +++ b/tensorflow/python/saved_model/load_v1_in_v2.py @@ -95,9 +95,16 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): with wrapped.graph.as_default(): init_op = loader_impl.get_init_op(meta_graph_def) if init_op is not None: - # TODO(allenl): Deal with assets - wrapped.prune(feeds=[], - fetches=[wrapped.graph.as_graph_element(init_op)])() + asset_feed_tensors = [] + asset_paths = [] + for tensor_name, value in loader_impl.get_asset_tensors( + self._export_dir, meta_graph_def).items(): + asset_feed_tensors.append(wrapped.graph.as_graph_element(tensor_name)) + asset_paths.append(tracking.TrackableAsset(value)) + init_fn = wrapped.prune( + feeds=asset_feed_tensors, + fetches=[wrapped.graph.as_graph_element(init_op)]) + init_fn(*[path.asset_path for path in asset_paths]) signature_functions = self._extract_signatures(wrapped, meta_graph_def) root = tracking.AutoCheckpointable() root.signatures = signature_serialization.create_signature_map( diff --git a/tensorflow/python/saved_model/load_v1_in_v2_test.py b/tensorflow/python/saved_model/load_v1_in_v2_test.py index 62e5e721b4..99d2495292 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2_test.py +++ b/tensorflow/python/saved_model/load_v1_in_v2_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +import shutil from tensorflow.python.client import session as session_lib from tensorflow.python.eager import backprop @@ -26,11 +27,14 @@ from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops +from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables from tensorflow.python.saved_model import builder_impl from tensorflow.python.saved_model import load +from tensorflow.python.saved_model import save from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.saved_model import simple_save from tensorflow.python.saved_model import utils_impl @@ -149,6 +153,53 @@ class LoadTest(test.TestCase): self.evaluate(second_imported.signatures["second_key"]( second_start=constant_op.constant(2.)))) + def _v1_asset_saved_model(self): + export_graph = ops.Graph() + vocab_path = os.path.join(self.get_temp_dir(), "vocab.txt") + with open(vocab_path, "w") as f: + f.write("alpha\nbeta\ngamma\n") + with export_graph.as_default(): + initializer = lookup_ops.TextFileInitializer( + vocab_path, + key_dtype=dtypes.string, + key_index=lookup_ops.TextFileIndex.WHOLE_LINE, + value_dtype=dtypes.int64, + value_index=lookup_ops.TextFileIndex.LINE_NUMBER) + table = lookup_ops.HashTable( + initializer, default_value=-1) + start = array_ops.placeholder( + shape=None, dtype=dtypes.string, name="in") + output = table.lookup(start, name="out") + with session_lib.Session() as session: + session.run([table.initializer]) + path = os.path.join(self.get_temp_dir(), "saved_model", str(ops.uid())) + simple_save.simple_save( + session, + path, + inputs={"start": start}, + outputs={"output": output}, + legacy_init_op=table.initializer) + file_io.delete_file(vocab_path) + return path + + def test_asset_loading(self): + first_path = self._v1_asset_saved_model() + imported = load.load(first_path) + fn = imported.signatures["serving_default"] + self.assertAllClose({"output": [2, 0]}, + fn(start=constant_op.constant(["gamma", "alpha"]))) + second_path = os.path.join(self.get_temp_dir(), "saved_model", + str(ops.uid())) + save.save(imported, second_path, signatures=imported.signatures) + shutil.rmtree(first_path) + self.skipTest( + "TODO(b/124321570): save TrackableAssets and make re-saving initialize " + "correctly") + second_import = load.load(second_path) + fn = second_import.signatures["serving_default"] + self.assertAllClose({"output": [2, 0]}, + fn(start=constant_op.constant(["gamma", "alpha"]))) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/saved_model/loader_impl.py b/tensorflow/python/saved_model/loader_impl.py index e5be03aae4..bfabef9174 100644 --- a/tensorflow/python/saved_model/loader_impl.py +++ b/tensorflow/python/saved_model/loader_impl.py @@ -88,7 +88,7 @@ def parse_saved_model(export_dir): _parse_saved_model = parse_saved_model -def _get_asset_tensors(export_dir, meta_graph_def_to_load, import_scope=None): +def get_asset_tensors(export_dir, meta_graph_def_to_load, import_scope=None): """Gets the asset tensors, if defined in the meta graph def to load. Args: @@ -393,7 +393,7 @@ class SavedModelLoader(object): meta_graph_def = self.get_meta_graph_def_from_tags(tags) with sess.graph.as_default(): # Get asset tensors, if any. - asset_tensors_dictionary = _get_asset_tensors( + asset_tensors_dictionary = get_asset_tensors( self._export_dir, meta_graph_def, import_scope=import_scope) init_op = get_init_op(meta_graph_def, import_scope) -- GitLab From 6baa1f85beae5ce9410afed8666b355fe394cc3d Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 13 Feb 2019 10:00:06 -0800 Subject: [PATCH 044/351] Replace CHECK with an error status in BaseRemoteRendezvous::RecvAsync(). It should not be possible for RecvAsync to be called on an uninitialized BaseRemoteRendezvous, but some users have reported this happening, leading to server crashes. Replace it with an error status to enable recovery, and thread that error status back to the caller. PiperOrigin-RevId: 233774643 --- tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc | 5 ++++- tensorflow/core/distributed_runtime/graph_mgr.cc | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc index de6e4b4a7c..a642313275 100644 --- a/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc +++ b/tensorflow/core/distributed_runtime/base_rendezvous_mgr.cc @@ -293,8 +293,11 @@ void BaseRemoteRendezvous::RecvAsync(const ParsedKey& parsed, const Rendezvous::Args& recv_args, DoneCallback done) { VLOG(1) << "RemoteRendezvous Recv " << this << " " << parsed.FullKey(); - CHECK(is_initialized()) << "RecvAsync called when uninitialized."; Status s = ValidateDevices(parsed, false /*!is_src*/); + if (s.ok() && !is_initialized()) { + s.Update(errors::Internal( + "RecvAsync called when uninitialized (key:", parsed.FullKey(), ").")); + } if (!s.ok()) { done(s, Args(), recv_args, Tensor(), false); return; diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 1065f021a1..144113a043 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -356,6 +356,12 @@ Status GraphMgr::RecvOutputs(const int64 step_id, NamedTensors* out) { Rendezvous* rendezvous = worker_env_->rendezvous_mgr->Find(step_id); Status s = RecvOutputsFromRendezvous(rendezvous, out, Rendezvous::Args()); rendezvous->Unref(); + if (!s.ok()) { + // Failing to fetch the outputs should not be possible, so rewrite the error + // status to an INTERNAL error. + s = errors::Internal("Failed to fetch outputs for step ", step_id, + ". (Original error message: ", s.ToString(), ")"); + } return s; } -- GitLab From f085b878cea7e9580560d6e68a33ba9170a0c2fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 10:03:59 -0800 Subject: [PATCH 045/351] Add collective all_gather method to unofficial python interface. Initial implementation is by ring-algorithm. Intended use is via distribution strategies. The implementation of all-gather is very similar to that of all-reduce, so common code is factored out into ring_alg.{h,cc}. PiperOrigin-RevId: 233775623 --- tensorflow/core/BUILD | 33 + .../base_api/api_def_CollectiveGather.pbtxt | 5 + .../base_collective_executor.cc | 30 +- .../common_runtime/base_collective_executor.h | 10 +- .../collective_param_resolver_local.cc | 2 + tensorflow/core/common_runtime/ring_alg.cc | 430 ++++++++++++ tensorflow/core/common_runtime/ring_alg.h | 124 ++++ .../core/common_runtime/ring_gatherer.cc | 266 +++++++ .../core/common_runtime/ring_gatherer.h | 51 ++ .../core/common_runtime/ring_gatherer_test.cc | 651 ++++++++++++++++++ .../core/common_runtime/ring_reducer.cc | 378 +--------- tensorflow/core/common_runtime/ring_reducer.h | 90 +-- .../core/common_runtime/ring_reducer_test.cc | 7 +- tensorflow/core/framework/collective.h | 1 + tensorflow/core/kernels/collective_ops.cc | 51 ++ tensorflow/core/ops/collective_ops.cc | 11 + .../python/framework/auto_control_deps.py | 1 + tensorflow/python/ops/collective_ops.py | 31 +- tensorflow/python/ops/collective_ops_test.py | 40 +- .../api/golden/v1/tensorflow.raw_ops.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 4 + 21 files changed, 1747 insertions(+), 473 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt create mode 100644 tensorflow/core/common_runtime/ring_alg.cc create mode 100644 tensorflow/core/common_runtime/ring_alg.h create mode 100644 tensorflow/core/common_runtime/ring_gatherer.cc create mode 100644 tensorflow/core/common_runtime/ring_gatherer.h create mode 100644 tensorflow/core/common_runtime/ring_gatherer_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index cc242d0e3c..b320a068fc 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2969,6 +2969,8 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/rendezvous_mgr.h", "common_runtime/rendezvous_util.h", "common_runtime/ring_reducer.h", + "common_runtime/ring_alg.h", + "common_runtime/ring_gatherer.h", "common_runtime/session_factory.h", "common_runtime/single_threaded_cpu_device.h", "common_runtime/stats_publisher_interface.h", @@ -3025,6 +3027,8 @@ tf_cuda_library( "common_runtime/renamed_device.cc", "common_runtime/rendezvous_mgr.cc", "common_runtime/rendezvous_util.cc", + "common_runtime/ring_alg.cc", + "common_runtime/ring_gatherer.cc", "common_runtime/ring_reducer.cc", "common_runtime/session.cc", "common_runtime/session_factory.cc", @@ -3996,6 +4000,35 @@ tf_cc_tests_gpu( ], ) +tf_cc_tests_gpu( + name = "ring_gatherer_test", + size = "medium", + srcs = [ + "common_runtime/ring_gatherer_test.cc", + ], + linkstatic = tf_kernel_tests_linkstatic(), + tags = tf_cuda_tests_tags(), + deps = [ + ":all_kernels", + ":core", + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":framework", + ":framework_internal", + ":gpu_runtime", + ":lib", + ":lib_internal", + ":ops", + ":protos_all_cc", + ":protos_test_cc", + ":test", + ":test_main", + ":testlib", + "@com_google_absl//absl/memory", + ], +) + tf_cc_tests_gpu( name = "hierarchical_tree_broadcaster_test", size = "medium", diff --git a/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt b/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt new file mode 100644 index 0000000000..3cd833b945 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_CollectiveGather.pbtxt @@ -0,0 +1,5 @@ +op { + graph_op_name: "CollectiveGather" + summary: "Mutually accumulates multiple tensors of identical type and shape." + visibility: HIDDEN +} diff --git a/tensorflow/core/common_runtime/base_collective_executor.cc b/tensorflow/core/common_runtime/base_collective_executor.cc index 8870a5399c..c9e3cf4086 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.cc +++ b/tensorflow/core/common_runtime/base_collective_executor.cc @@ -63,7 +63,7 @@ int64 CollectiveAdapter::AlignedChunkElts(int64 elt_bytes, int64 total_elts, (chunk_bytes < EIGEN_MAX_ALIGN_BYTES) ? (EIGEN_MAX_ALIGN_BYTES - chunk_bytes) : (EIGEN_MAX_ALIGN_BYTES - (chunk_bytes % EIGEN_MAX_ALIGN_BYTES)); - CHECK_EQ(0, diff % elt_bytes); + DCHECK_EQ(0, diff % elt_bytes); base_chunk_elts += (diff / elt_bytes); DCHECK_EQ(0, ((base_chunk_elts * elt_bytes) % EIGEN_MAX_ALIGN_BYTES)) << "total_elts=" << total_elts << " num_chunks=" << num_chunks @@ -78,17 +78,23 @@ class CollectiveAdapterImpl : public CollectiveAdapter { public: // Takes ownership of output and prepares to properly alias its chunks. // Ownership is taken because the shape may temporarily change. - CollectiveAdapterImpl(Tensor* output, int64 num_chunks, Allocator* allocator) + CollectiveAdapterImpl(Tensor* output, int64 num_chunks, Allocator* allocator, + bool align_chunks) : output_(std::move(*output)), dt_(output_.dtype()), old_shape_(output_.shape()), num_chunks_(num_chunks), allocator_(allocator), total_elts_(output_.NumElements()), - chunk_elts_(AlignedChunkElts(sizeof(T), total_elts_, num_chunks_)), + chunk_elts_(align_chunks + ? AlignedChunkElts(sizeof(T), total_elts_, num_chunks_) + : total_elts_ / num_chunks_), data_start_(reinterpret_cast(DMAHelper::base(&output_))), data_end_(data_start_ + total_elts_) { - CHECK_GT(chunk_elts_, 0); + if (!align_chunks) { + DCHECK_EQ(total_elts_, num_chunks_ * chunk_elts_); + } + DCHECK_GT(chunk_elts_, 0); Flatten(); } @@ -176,19 +182,24 @@ class CollectiveAdapterImpl : public CollectiveAdapter { } // namespace CollectiveAdapter* MakeCollectiveAdapter(Tensor* output, int num_chunks, - Allocator* allocator) { + Allocator* allocator, + bool align_chunks) { switch (output->dtype()) { case DT_FLOAT: - return new CollectiveAdapterImpl(output, num_chunks, allocator); + return new CollectiveAdapterImpl(output, num_chunks, allocator, + align_chunks); break; case DT_DOUBLE: - return new CollectiveAdapterImpl(output, num_chunks, allocator); + return new CollectiveAdapterImpl(output, num_chunks, allocator, + align_chunks); break; case DT_INT32: - return new CollectiveAdapterImpl(output, num_chunks, allocator); + return new CollectiveAdapterImpl(output, num_chunks, allocator, + align_chunks); break; case DT_INT64: - return new CollectiveAdapterImpl(output, num_chunks, allocator); + return new CollectiveAdapterImpl(output, num_chunks, allocator, + align_chunks); break; default: LOG(FATAL) << "Unsupported type " << output->dtype() @@ -227,6 +238,7 @@ void BaseCollectiveExecutor::ExecuteAsync(OpKernelContext* ctx, Tensor* output = ctx->mutable_output(0); const Tensor* input = (col_params.instance.type == REDUCTION_COLLECTIVE || + col_params.instance.type == GATHER_COLLECTIVE || (col_params.instance.type == BROADCAST_COLLECTIVE && col_params.is_source)) ? &ctx->input(0) diff --git a/tensorflow/core/common_runtime/base_collective_executor.h b/tensorflow/core/common_runtime/base_collective_executor.h index b711aa6d50..bc85b5af5f 100644 --- a/tensorflow/core/common_runtime/base_collective_executor.h +++ b/tensorflow/core/common_runtime/base_collective_executor.h @@ -78,9 +78,15 @@ class CollectiveAdapter { }; // Create a CollectiveAdaptor wrapping 'output', specialized to its -// data-type and shape. +// data-type and shape. If align_chunks == true then chunk size may +// be larger than output->NumElements() / num_chunks and one or more +// of the suffix chunks may be empty. Chunks will be arranged to start +// and end on alignment boundaries. If align_chunks == false then +// output->NumElements() % num_chunks must be 0 and all chunks will +// have exactly the same size, ignoring alignment issues. CollectiveAdapter* MakeCollectiveAdapter(Tensor* output, int num_chunks, - Allocator* allocator); + Allocator* allocator, + bool align_chunks = true); // Default implementation of CollectiveExecutor. Delegates the actual // work of moving data to a class specialized for the operation type, diff --git a/tensorflow/core/common_runtime/collective_param_resolver_local.cc b/tensorflow/core/common_runtime/collective_param_resolver_local.cc index 5acba6e093..a76708385b 100644 --- a/tensorflow/core/common_runtime/collective_param_resolver_local.cc +++ b/tensorflow/core/common_runtime/collective_param_resolver_local.cc @@ -618,6 +618,8 @@ void CollectiveParamResolverLocal::AssignCollectiveType(CollectiveParams* cp) { } else { cp->instance.impl_details.collective_name = "RingReduce"; } + } else if (cp->instance.type == GATHER_COLLECTIVE) { + cp->instance.impl_details.collective_name = "RingGather"; } else { cp->instance.impl_details.collective_name = "undef"; } diff --git a/tensorflow/core/common_runtime/ring_alg.cc b/tensorflow/core/common_runtime/ring_alg.cc new file mode 100644 index 0000000000..c20cc74bf7 --- /dev/null +++ b/tensorflow/core/common_runtime/ring_alg.cc @@ -0,0 +1,430 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/ring_alg.h" + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/collective_util.h" +#include "tensorflow/core/common_runtime/copy_tensor.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/types.h" + +// Set true for greater intelligibility of debug mode log messages. +#define READABLE_KEYS false +// A ring algorithm exchanges chunks of tensor between devices. The chunk size +// depends on the number of subdivisions specified in the algorithm. If the +// user does not specify the number of subdivisions we may infer the number +// dynamically so that the resulting chunk size does not exceed +// kMaxChunkSizeBytes, empirically set at 4 MiB. +constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024); +// kMaxSubdivsPerDev is used to give an upper bound on the number of +// subdivisions dynamically generated. A reasonable value would be a small +// multiple of the number of NICs adjacent to each device. +constexpr int kMaxSubdivsPerDevice = 2; + +namespace tensorflow { +namespace { +// Each CollectiveOp implementation is free to define its own +// BufRendezvous key format. This function produces the key used by +// RingAlg instances. Note that the exec_key will differentiate between +// different instances consequently we don't need to further differentiate +// between subclasses of RingAlg. +string RingAlgBufKey(const string& name, const string& exec_key, int pass, + int section, int source_rank) { + if (READABLE_KEYS) { + return strings::StrCat(name, "(", exec_key, "):pass(", pass, "):section(", + section, "):srcrank(", source_rank, ")"); + } else { + // TODO(b/78352018): Try out some kind of denser encoding, e.g. 128 bit + // hash. + return strings::StrCat(exec_key, ":", pass, ":", section, ":", source_rank); + } +} + +} // namespace + +void RingAlg::PCQueue::Enqueue(RingField* rf) { + mutex_lock l(pcq_mu_); + deque_.push_back(rf); + if (waiter_count_ > 0) { + cv_.notify_one(); + } +} + +RingAlg::RingField* RingAlg::PCQueue::Dequeue() { + mutex_lock l(pcq_mu_); + if (deque_.empty()) { + ++waiter_count_; + while (deque_.empty()) { + cv_.wait(l); + } + --waiter_count_; + } + RingField* rf = deque_.front(); + deque_.pop_front(); + return rf; +} + +RingAlg::RingAlg(CollectiveType type, const string& name) + : type_(type), + name_(name), + col_ctx_(nullptr), + col_params_(nullptr), + done_(nullptr), + group_size_(-1), + num_subdivs_(-1) {} + +namespace { +Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) { + if (col_params->instance.shape.num_elements() == 0) { + return errors::Internal("shape in CollectiveParams should be non-empty"); + } + const int kAvgDevPerTask = + col_params->group.group_size / col_params->group.num_tasks; + const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask; + if (kMaxNumSubdivs <= 0) { + return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs, + " in ", + col_params->instance.impl_details.collective_name); + } + // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add + // as many offsets as needed so that the size of tensor chunks <= + // kMaxChunkSizeBytes. Empirically, chunks that are too small or too large + // lead to worse performance. + int num_subdivs = 0; + const size_t tensor_size = col_params->instance.shape.num_elements() * + DataTypeSize(col_params->instance.data_type); + size_t chunk_size; + do { + ++num_subdivs; + int num_chunks = col_params->group.group_size * num_subdivs; + chunk_size = tensor_size / num_chunks; + VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks + << " chunk_size " << chunk_size; + } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs); + if (num_subdivs <= 0) { + return errors::Internal("Unexpected num_subdivs ", num_subdivs, " in ", + col_params->instance.impl_details.collective_name); + } + + int subdiv_stride = kAvgDevPerTask / num_subdivs; + if (subdiv_stride == 0) subdiv_stride = 1; + col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs); + for (int sdi = 0; sdi < num_subdivs; ++sdi) { + int subdiv_offset = subdiv_stride * sdi; + if (sdi % 2 == 1) subdiv_offset *= -1; + col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset); + } + + if (VLOG_IS_ON(2)) { + string subdiv_buf; + for (const int subdiv_offset : + col_params->instance.impl_details.subdiv_offsets) { + strings::StrAppend(&subdiv_buf, " ", subdiv_offset); + } + VLOG(2) << "Dynamically generated " << num_subdivs + << " subdiv_offsets:" << subdiv_buf << " tensor_size " + << tensor_size << " chunk_size " << chunk_size; + } + + return Status::OK(); +} +} // namespace + +Status RingAlg::InitializeCollectiveParams(CollectiveParams* col_params) { + const string& device_name = + col_params->instance.device_names[col_params->default_rank]; + // Each subdiv permutation is a ring formed by rotating each + // single-task subsequence of devices by an offset. This makes most + // sense when each task has the same number of devices but we can't + // depend on that being the case so we'll compute something that + // works in any case. + + // Start by counting the devices in each task. + // Precondition: device_names must be sorted so that all devices in + // the same task are adjacent. + VLOG(2) << "Sorted task names: " + << str_util::Join(col_params->instance.task_names, ", "); + std::vector dev_per_task; + const string* prior_task_name = &col_params->instance.task_names[0]; + int dev_count = 1; + for (int di = 1; di < col_params->group.group_size; ++di) { + if (col_params->instance.task_names[di] != *prior_task_name) { + dev_per_task.push_back(dev_count); + dev_count = 1; + prior_task_name = &col_params->instance.task_names[di]; + } else { + ++dev_count; + } + } + dev_per_task.push_back(dev_count); + DCHECK_EQ(col_params->group.num_tasks, dev_per_task.size()); + + if (col_params->instance.impl_details.subdiv_offsets.empty()) { + TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params)); + } + + // Generate a ring permutation for requested offset. + VLOG(2) << "Setting up perms for col_params " << col_params + << " subdiv_permutations " + << &col_params->instance.impl_details.subdiv_permutations; + col_params->instance.impl_details.subdiv_permutations.resize( + col_params->instance.impl_details.subdiv_offsets.size()); + col_params->subdiv_rank.resize( + col_params->instance.impl_details.subdiv_offsets.size(), -1); + for (int sdi = 0; + sdi < col_params->instance.impl_details.subdiv_offsets.size(); ++sdi) { + std::vector& perm = + col_params->instance.impl_details.subdiv_permutations[sdi]; + DCHECK_EQ(perm.size(), 0); + int offset = col_params->instance.impl_details.subdiv_offsets[sdi]; + // A negative subdivision offset is interpreted as follows: + // 1. Reverse the local device ordering. + // 2. Begin the subdivision at abs(offset) in the reversed ordering. + bool reverse = false; + if (offset < 0) { + offset = abs(offset); + reverse = true; + } + int prior_dev_count = 0; // sum over prior worker device counts + for (int ti = 0; ti < col_params->group.num_tasks; ++ti) { + for (int di = 0; di < dev_per_task[ti]; ++di) { + int di_offset = (di + offset) % dev_per_task[ti]; + int offset_di = + reverse ? (dev_per_task[ti] - (di_offset + 1)) : di_offset; + // Device index in global subdivision permutation. + int permuted_di = prior_dev_count + offset_di; + int rank = static_cast(perm.size()); + perm.push_back(permuted_di); + if (col_params->instance.device_names[permuted_di] == device_name) { + DCHECK_EQ(permuted_di, col_params->default_rank); + col_params->subdiv_rank[sdi] = rank; + } + } + prior_dev_count += dev_per_task[ti]; + } + DCHECK_EQ(col_params->group.group_size, perm.size()); + } + + VLOG(2) << collective_util::SubdivPermDebugString(*col_params); + return Status::OK(); +} + +Status RingAlg::InitializeCollectiveContext(CollectiveContext* col_ctx) { + DCHECK(col_ctx->dev_mgr); + col_ctx_ = col_ctx; + col_params_ = &col_ctx->col_params; + return collective_util::InitializeDeviceAndLocality( + col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device, + &col_ctx->device_locality); +} + +string RingAlg::TensorDebugString(const Tensor& tensor) { + const DeviceBase::GpuDeviceInfo* gpu_device_info = + col_ctx_->op_ctx->device()->tensorflow_gpu_device_info(); + if (gpu_device_info) { + Tensor cpu_tensor(tensor.dtype(), tensor.shape()); + Notification note; + gpu_device_info->default_context->CopyDeviceTensorToCPU( + &tensor, "" /*tensor_name*/, col_ctx_->device, &cpu_tensor, + [¬e](const Status& s) { + DCHECK(s.ok()); + note.Notify(); + }); + note.WaitForNotification(); + return cpu_tensor.SummarizeValue(64); + } else { + return tensor.SummarizeValue(64); + } +} + +void RingAlg::StartAbort(const Status& s) { + // In abort mode we stop issuing additional ProvideBuf + // and ConsumeBuf calls, but we need to wait for all of the + // outstanding callbacks to be invoked before quitting. + bool abort_started = false; + { + mutex_lock l(status_mu_); + if (status_.ok()) { + LOG(ERROR) << "Aborting Ring" << name_ << " with " << s; + abort_started = true; + status_.Update(s); + } + } + // If this is the initial entry to abort mode then invoke StartAbort + // on the CollectiveExecutor that invoked us. That should start + // cancellation on all of the outstanding CollectiveRemoteAccess + // actions. + if (abort_started) { + col_ctx_->col_exec->StartAbort(s); + } +} + +void RingAlg::Finish(bool ok) { + if (ok) { + // Recover the output from the adaptor. + ca_->ConsumeFinalValue(col_ctx_->output); + } + Status s; + { + mutex_lock l(status_mu_); + s = status_; + } + rfv_.clear(); // Give up Refs on output tensor. + done_(s); +} + +// At the beginning of the algorithm initialize a RingField struct for +// every independent field of the tensor. +void RingAlg::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx, + int field_idx) { + // Note on field indexing: There are group_size_ devices in the + // instance, implying the same number of chunks per tensor, where a + // chunk is the unit of data transferred in a time step. However, if + // a device can simultaneously send data by 2 or more independent + // channels we can speed up the transfer by subdividing chunks and + // processing multiple subdivisions at once. So the actual number + // of RingFields is group_size_ * num_subdivs_. + DCHECK_EQ(field_idx, (chunk_idx * num_subdivs_) + subdiv_idx); + rf->chunk_idx = chunk_idx; + rf->subdiv_idx = subdiv_idx; + rf->sc_idx = field_idx; + rf->rank = col_params_->subdiv_rank[subdiv_idx]; + rf->second_pass = false; + rf->action = RF_INIT; + // Recv from the device with preceding rank within the subdivision. + int recv_from_rank = (rf->rank + (group_size_ - 1)) % group_size_; + int send_to_rank = (rf->rank + 1) % group_size_; + rf->recv_dev_idx = col_params_->instance.impl_details + .subdiv_permutations[subdiv_idx][recv_from_rank]; + int send_dev_idx = col_params_->instance.impl_details + .subdiv_permutations[subdiv_idx][send_to_rank]; + rf->recv_is_remote = !col_params_->task.is_local[rf->recv_dev_idx]; + rf->send_is_remote = !col_params_->task.is_local[send_dev_idx]; + if (ca_->ChunkBytes(rf->sc_idx) > 0) { + // In pass 0 we skip Recv when rank = chunk_idx + rf->do_recv = (rf->chunk_idx != rf->rank); + // In pass 0 we skip Send when rank = chunk_idx-1 + rf->do_send = + (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); + } + rf->is_final = + (rf->rank == ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); + if (rf->do_send || rf->do_recv) { + rf->chunk = ca_->ChunkAlias(rf->sc_idx); + } + VLOG(2) << this << " InitRingField " << rf->DebugString() << " chunk " + << ca_->TBounds(rf->chunk); +} + +// When a RingField transitions from first to second recompute the +// do_send and do_recv values. +void RingAlg::AdvanceToSecondPass(RingField* rf) { + VLOG(3) << "IncrRingField old value " << rf->DebugString(); + DCHECK(!rf->second_pass); + rf->second_pass = true; + rf->action = RF_INIT; + if (ca_->ChunkBytes(rf->sc_idx) > 0) { + // In pass 1 the send/no-send boundary moves down 1 place. + rf->do_recv = + (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); + rf->do_send = + (rf->rank != ((rf->chunk_idx + (group_size_ - 2)) % group_size_)); + } + rf->is_final = + (rf->rank == ((rf->chunk_idx + (group_size_ - 2)) % group_size_)); + VLOG(3) << "IncrRingField new value " << rf->DebugString(); +} + +string RingAlg::RingField::DebugString() const { + string rv = strings::StrCat("RingField rank=", rank, " chunk_idx=", chunk_idx, + " subdiv=", subdiv_idx, " sc_idx=", sc_idx, + " action=", action); + strings::StrAppend(&rv, " pass=", second_pass); + strings::StrAppend(&rv, " do_send=", do_send, " do_recv=", do_recv, + " is_final=", is_final, " recv_is_remote=", recv_is_remote, + " recv_dev_idx=", recv_dev_idx, " sc_idx=", sc_idx); + return rv; +} + +void RingAlg::DispatchSend(RingField* rf, const StatusCallback& done) { + DCHECK(rf->do_send); + string send_buf_key = RingAlgBufKey(name_, col_ctx_->exec_key, + rf->second_pass, rf->sc_idx, rf->rank); + VLOG(3) << "DispatchSend rank=" << col_params_->default_rank << " send key " + << send_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " sc_idx " + << rf->sc_idx; + int send_to_rank = (rf->rank + 1) % group_size_; + int send_to_dev_idx = col_params_->instance.impl_details + .subdiv_permutations[rf->subdiv_idx][send_to_rank]; + col_ctx_->col_exec->PostToPeer( + col_params_->instance.device_names[send_to_dev_idx], + col_params_->instance.task_names[send_to_dev_idx], send_buf_key, + col_ctx_->device, col_ctx_->op_ctx->op_device_context(), + col_ctx_->op_ctx->output_alloc_attr(0), &rf->chunk, + col_ctx_->device_locality, done); +} + +void RingAlg::DispatchRecv(RingField* rf, const StatusCallback& done) { + DCHECK(rf->do_recv); + string recv_buf_key = + RingAlgBufKey(name_, col_ctx_->exec_key, rf->second_pass, rf->sc_idx, + (rf->rank + (group_size_ - 1)) % group_size_); + VLOG(3) << "DispatchRecv rank=" << col_params_->default_rank << " recv key " + << recv_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " into " + << ((col_params_->merge_op != nullptr) ? "tmp_chunk" : "chunk"); + Tensor* dst_tensor = (!rf->second_pass && (col_params_->merge_op != nullptr)) + ? &rf->tmp_chunk + : &rf->chunk; + col_ctx_->col_exec->RecvFromPeer( + col_params_->instance.device_names[rf->recv_dev_idx], + col_params_->instance.task_names[rf->recv_dev_idx], + col_params_->task.is_local[rf->recv_dev_idx], recv_buf_key, + col_ctx_->device, col_ctx_->op_ctx->op_device_context(), + col_ctx_->op_ctx->output_alloc_attr(0), dst_tensor, + col_ctx_->device_locality, rf->subdiv_idx, done); +} + +string RingAlg::FieldState() { + string s = strings::StrCat( + "Ring", name_, " ", strings::Hex(reinterpret_cast(this)), + " exec ", col_ctx_->exec_key, " step_id=", col_ctx_->step_id, + " state of all ", rfv_.size(), " fields:"); + for (int i = 0; i < rfv_.size(); ++i) { + s.append("\n"); + s.append(rfv_[i].DebugString()); + } + return s; +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/ring_alg.h b/tensorflow/core/common_runtime/ring_alg.h new file mode 100644 index 0000000000..dc07618f88 --- /dev/null +++ b/tensorflow/core/common_runtime/ring_alg.h @@ -0,0 +1,124 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_ + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/framework/collective.h" + +namespace tensorflow { +class Device; + +// Basic ring-algorithm implementation to be further specialized +// for specific collective functions. +class RingAlg : public CollectiveImplementationInterface { + public: + explicit RingAlg(CollectiveType type, const string& name); + ~RingAlg() override {} + + // Establishes the requested number of subdivision permutations based on the + // ring order implicit in the device order. + Status InitializeCollectiveParams(CollectiveParams* col_params) override; + + // Initializes members of CollectiveContext not yet initialized, i.e. device + // and device_locality. Also saves the CollectiveContext in this object. + Status InitializeCollectiveContext(CollectiveContext* col_ctx) override; + + // No-op for ring alg. + Status InitializeInstanceBeforeGroupDiscovery(CollectiveParams*) override { + return Status::OK(); + } + + protected: + // Called when a bad status is received that implies we should terminate + // execution and return a bad status. + void StartAbort(const Status& s); + void Finish(bool ok); + + // Current status of a RingField + enum RingFieldAction { + RF_INIT = 0, // Just initialized for a pass + RF_RECV, // Recv pending + RF_REDUCE, // Reduce pending + RF_FINALIZE, // FinalOp pending + RF_SEND_READY, // Ready to send + RF_SEND, // Send pending + RF_DONE, // No more work + }; + + // Tracks progress of actions on a single subfield of the entire tensor. + struct RingField { + int16 chunk_idx; // major division index + int16 subdiv_idx; // minor division index + int16 sc_idx; // subchunk index + int16 rank; // rank within subdiv permutation + int16 recv_dev_idx; // dev from which value should be recv'd + RingFieldAction action; + bool second_pass; + bool recv_is_remote = false; + bool send_is_remote = false; + bool do_send = false; // is the value sent in this pass? + bool do_recv = false; // is the value recv'd in this pass? + bool is_final = false; // is the last field in the pass for this rank + Tensor chunk; // alias to field values + Tensor tmp_chunk; + Status status; + string DebugString() const; + }; + virtual void InitRingField(RingField* rf, int chunk_idx, int subdiv_idx, + int field_idx); + void AdvanceToSecondPass(RingField* rf); + void DispatchSend(RingField* rf, const StatusCallback& done); + void DispatchRecv(RingField* rf, const StatusCallback& done); + + // For constructing log messages for debugging. + string FieldState(); + string TensorDebugString(const Tensor& tensor); + + // Producer/Consumer Queue of RingField structs. + class PCQueue { + public: + void Enqueue(RingField* rf); + RingField* Dequeue(); + + private: + mutex pcq_mu_; + condition_variable cv_; + int waiter_count_ GUARDED_BY(pcq_mu_) = 0; + std::deque deque_ GUARDED_BY(pcq_mu_); + }; + + const CollectiveType type_; + const string name_; + CollectiveContext* col_ctx_; // Not owned + const CollectiveParams* col_params_; // Not owned + StatusCallback done_; + int group_size_; + int num_subdivs_; + Tensor group_size_tensor_; + Notification group_size_tensor_ready_; + std::unique_ptr ca_; + mutex status_mu_; + Status status_ GUARDED_BY(status_mu_); + std::vector rfv_; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_RING_ALG_H_ diff --git a/tensorflow/core/common_runtime/ring_gatherer.cc b/tensorflow/core/common_runtime/ring_gatherer.cc new file mode 100644 index 0000000000..58251fc171 --- /dev/null +++ b/tensorflow/core/common_runtime/ring_gatherer.cc @@ -0,0 +1,266 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/ring_gatherer.h" + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/collective_util.h" +#include "tensorflow/core/common_runtime/copy_tensor.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/device_base.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +Status RingGatherer::InitializeCollectiveParams(CollectiveParams* col_params) { + DCHECK_EQ(col_params->instance.type, GATHER_COLLECTIVE); + DCHECK_EQ(col_params->instance.impl_details.collective_name, "RingGather"); + // TODO(tucker): Maybe add subdiv support. It's only useful with + // multiple NICS, and maybe gather performance isn't important enough. + // For now, there must always be only a single subdiv at offset 0. + if (!col_params->instance.impl_details.subdiv_offsets.empty() && + (col_params->instance.impl_details.subdiv_offsets.size() > 1 || + col_params->instance.impl_details.subdiv_offsets[0] != 0)) { + return errors::InvalidArgument( + "RingGather cannot take any subdiv offset other than 0."); + } + if (col_params->instance.impl_details.subdiv_offsets.empty()) { + col_params->instance.impl_details.subdiv_offsets.push_back(0); + } + return RingAlg::InitializeCollectiveParams(col_params); +} + +void RingGatherer::Run(StatusCallback done) { + DCHECK(col_ctx_); + DCHECK(col_params_); + done_ = std::move(done); + group_size_ = col_params_->group.group_size; + num_subdivs_ = static_cast( + col_params_->instance.impl_details.subdiv_permutations.size()); + DCHECK_GT(num_subdivs_, 0); + + if (VLOG_IS_ON(1)) { + string buf; + for (int r = 0; r < col_params_->instance.device_names.size(); ++r) { + strings::StrAppend(&buf, "dev ", r, " : ", + col_params_->instance.device_names[r], "\n"); + } + for (int sd = 0; + sd < col_params_->instance.impl_details.subdiv_permutations.size(); + ++sd) { + strings::StrAppend(&buf, "\nsubdiv ", sd, " perm: "); + for (auto x : + col_params_->instance.impl_details.subdiv_permutations[sd]) { + strings::StrAppend(&buf, x, ", "); + } + } + VLOG(1) << "RingGatherer::Run for device " << col_ctx_->device_name + << " default_rank " << col_params_->default_rank << "\n" + << buf; + } + + // Prepare to alias fields within the output. + AllocatorAttributes attr = col_ctx_->op_ctx->output_alloc_attr(0); + ca_.reset(MakeCollectiveAdapter(col_ctx_->output, group_size_ * num_subdivs_, + col_ctx_->device->GetAllocator(attr), + false /*align_chunks*/)); + + // Start by copying input to the rank-specific offset of output. + // We are running in a blockable thread and the callback can't block so + // just wait here on the copy. + Notification note; + Status status; + Tensor alias_chunk(ca_->ChunkAlias(col_params_->subdiv_rank[0])); + CollectiveRemoteAccessLocal::MemCpyAsync( + col_ctx_->op_ctx->input_device_context(0), + col_ctx_->op_ctx->op_device_context(), col_ctx_->device, col_ctx_->device, + col_ctx_->op_ctx->input_alloc_attr(0), + col_ctx_->op_ctx->output_alloc_attr(0), col_ctx_->input, &alias_chunk, + 0 /*dev_to_dev_stream_index*/, [¬e, &status](const Status& s) { + status.Update(s); + note.Notify(); + }); + note.WaitForNotification(); + if (!status.ok()) { + done_(status); + return; + } + Finish(RunAsyncParts()); +} + +bool RingGatherer::RunAsyncParts() { + // This function orchestrates RingGatherer actions on behalf of a + // single device. It is entered by a blockable thread that + // loops within it until all actions assigned to that device + // complete. Hence function local variables are accessible only by that + // one thread and do not require an explicit mutex. + rfv_.clear(); + rfv_.resize(group_size_ * num_subdivs_); + PCQueue ready_queue; + for (int chunk_idx = 0; chunk_idx < group_size_; ++chunk_idx) { + for (int subdiv_idx = 0; subdiv_idx < num_subdivs_; ++subdiv_idx) { + int rf_index = (chunk_idx * num_subdivs_) + subdiv_idx; + InitRingField(&rfv_[rf_index], chunk_idx, subdiv_idx, rf_index); + ready_queue.Enqueue(&rfv_[rf_index]); + } + } + const DeviceBase::GpuDeviceInfo* gpu_info = + col_ctx_->device->tensorflow_gpu_device_info(); + if (gpu_info) { + // Wait for all currently queued events on the CPU compute stream to + // complete before proceeding. The previous InitRingField calls allocated + // temp memory buffers that are not guaranteed to be valid (e.g. for RDMA + // write) unless we do. + Notification note; + Status s = gpu_info->default_context->ThenExecute( + col_ctx_->device, gpu_info->stream, [¬e]() { note.Notify(); }); + if (s.ok()) { + note.WaitForNotification(); + } else { + mutex_lock l(status_mu_); + status_ = + errors::Internal("Failed to dispatch ThenExecute in RingGatherer"); + return false; + } + } + + int field_done_count = 0; + int send_pending_count = 0; + int recv_pending_count = 0; + std::atomic aborted(false); + + // Loop until all RingFields have advanced to completion. + while (field_done_count < rfv_.size()) { + VLOG(4) << FieldState(); + // Wait for a RingField to appear in the ready_queue. + RingField* rf = ready_queue.Dequeue(); + // Advance the RingField to its next action and execute, repeating + // until either an async action has been started or the RingField + // is done. + bool dispatched = false; // true if async action was initiated + do { + if (aborted) { + // Requeue this RingField to be counted off below. + ready_queue.Enqueue(rf); + break; + } + switch (rf->action) { + case RF_INIT: + if (rf->do_recv) { + rf->action = RF_RECV; + auto requeue = [this, rf, &ready_queue, &aborted](Status s) { + if (!s.ok()) { + aborted = true; + StartAbort(s); + } + ready_queue.Enqueue(rf); + }; + DispatchRecv(rf, requeue); + dispatched = true; + ++recv_pending_count; + } else { + rf->action = RF_SEND_READY; + } + break; + case RF_RECV: + DCHECK_GT(recv_pending_count, 0); + --recv_pending_count; + rf->action = RF_SEND_READY; + break; + case RF_REDUCE: + // Never used for Gather, so just fall through. + TF_FALLTHROUGH_INTENDED; + case RF_FINALIZE: + // Never used for Gather, so just fall through. + TF_FALLTHROUGH_INTENDED; + case RF_SEND_READY: + if (rf->do_send) { + rf->action = RF_SEND; + auto send_complete = [this, rf, &ready_queue, &aborted](Status s) { + if (!s.ok()) { + aborted = true; + StartAbort(s); + } + ready_queue.Enqueue(rf); + }; + DispatchSend(rf, send_complete); + dispatched = true; + ++send_pending_count; + } else { + rf->action = RF_DONE; + } + break; + case RF_SEND: + DCHECK_GT(send_pending_count, 0); + --send_pending_count; + rf->action = RF_DONE; + break; + case RF_DONE: + break; + } + if (rf->action == RF_DONE) { + // There's only one pass. + ++field_done_count; + break; // from do while(!dispatched) + } + } while (!dispatched); + if (aborted) break; + } // while (field_done_count < number of fields) + + if (aborted) { + // All of the pending data actions should be aborted; field the + // callbacks and clear the queue before quitting. + while ((send_pending_count > 0) || (recv_pending_count > 0)) { + RingField* rf = ready_queue.Dequeue(); + switch (rf->action) { + case RF_RECV: + --recv_pending_count; + break; + case RF_SEND: + --send_pending_count; + break; + default: { + } // Ignore any other actions + } + } + } + + DCHECK_EQ(send_pending_count, 0); + DCHECK_EQ(recv_pending_count, 0); + + VLOG(2) << this << " device=" << col_ctx_->device_name << " finish;" + << " final value " << TensorDebugString(ca_->Value()); + return !aborted; +} + +REGISTER_COLLECTIVE(RingGather, RingGatherer); + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/ring_gatherer.h b/tensorflow/core/common_runtime/ring_gatherer.h new file mode 100644 index 0000000000..ee9634834d --- /dev/null +++ b/tensorflow/core/common_runtime/ring_gatherer.h @@ -0,0 +1,51 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_ + +#include +#include +#include +#include + +#include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/common_runtime/ring_alg.h" +#include "tensorflow/core/framework/collective.h" + +namespace tensorflow { +class Device; + +// Ring-algorithm implementation of collective all-gather. +class RingGatherer : public RingAlg { + public: + RingGatherer() : RingAlg(GATHER_COLLECTIVE, "Gather") {} + ~RingGatherer() override {} + + Status InitializeCollectiveParams(CollectiveParams* col_params) override; + + // Begins async execution of the ring gather algorithm. + // Must be called in a blockable thread. + // TODO(b/80529858): remove the previous warning when we have a dedicated + // collective threadpool. + void Run(StatusCallback done) override; + + private: + bool RunAsyncParts(); + + friend class RingGathererTest; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_RING_GATHERER_H_ diff --git a/tensorflow/core/common_runtime/ring_gatherer_test.cc b/tensorflow/core/common_runtime/ring_gatherer_test.cc new file mode 100644 index 0000000000..97ff7b58fa --- /dev/null +++ b/tensorflow/core/common_runtime/ring_gatherer_test.cc @@ -0,0 +1,651 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/common_runtime/ring_gatherer.h" + +#include +#include "absl/memory/memory.h" +#include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/common_runtime/collective_rma_local.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_resolver_local.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/test_collective_executor_mgr.h" +#include "tensorflow/core/common_runtime/threadpool_device.h" +#include "tensorflow/core/framework/collective.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" + +namespace tensorflow { + +// Wraps CollectiveRemoteAccessLocal with the ability to return an +// error status to the N'th action. +class FailTestRMA : public CollectiveRemoteAccessLocal { + public: + FailTestRMA(const DeviceMgr* dev_mgr, DeviceResolverInterface* dev_resolver, + int64 step_id, int fail_after) + : CollectiveRemoteAccessLocal(dev_mgr, dev_resolver, step_id), + fail_after_(fail_after) {} + + bool MaybeFail(const StatusCallback& done) { + bool fail_now = false; + { + mutex_lock l(mu_); + if (fail_after_ > 0) { + fail_now = (--fail_after_ == 0); + } + } + if (fail_now) { + done(errors::Internal("Deliberate failure")); + return true; + } + return false; + } + + void RecvFromPeer(const string& peer_device, const string& peer_task, + bool peer_is_local, const string& key, Device* to_device, + DeviceContext* to_device_ctx, + const AllocatorAttributes& to_alloc_attr, Tensor* to_tensor, + const DeviceLocality& client_locality, + int dev_to_dev_stream_index, + const StatusCallback& done) override { + if (MaybeFail(done)) return; + CollectiveRemoteAccessLocal::RecvFromPeer( + peer_device, peer_task, peer_is_local, key, to_device, to_device_ctx, + to_alloc_attr, to_tensor, client_locality, dev_to_dev_stream_index, + done); + } + + void PostToPeer(const string& peer_device, const string& peer_task, + const string& key, Device* from_device, + DeviceContext* from_device_ctx, + const AllocatorAttributes& from_alloc_attr, + const Tensor* from_tensor, + const DeviceLocality& client_locality, + const StatusCallback& done) override { + if (MaybeFail(done)) return; + CollectiveRemoteAccessLocal::PostToPeer( + peer_device, peer_task, key, from_device, from_device_ctx, + from_alloc_attr, from_tensor, client_locality, done); + } + + mutex mu_; + int fail_after_ GUARDED_BY(mu_); +}; + +std::unique_ptr GetKernel(const NodeDef& node, + const DeviceType& device_type, + DeviceBase* device) { + Status status; + std::unique_ptr k = CreateOpKernel( + device_type, device, device->GetAllocator(AllocatorAttributes()), node, + TF_GRAPH_DEF_VERSION, &status); + if (!status.ok()) { + LOG(FATAL) << status; + } + return k; +} + +static int64 kStepId = 123; + +class RingGathererTest : public ::testing::Test { + protected: + RingGathererTest() : device_type_(DEVICE_CPU) {} + +#ifdef GOOGLE_CUDA + void InitGPUDevices() { + auto device_factory = DeviceFactory::GetFactory("GPU"); + CHECK(device_factory); + SessionOptions options; + Status s = device_factory->CreateDevices( + options, "/job:worker/replica:0/task:0", &gpu_devices_); + CHECK(s.ok()); + } +#endif + + ~RingGathererTest() override { + stop_ = true; + for (auto i : instances_) delete i; + if (col_exec_) col_exec_->Unref(); + } + + void Init(int num_workers, int num_devices, DataType dtype, + const DeviceType& device_type, int num_subdivs, int fail_after) { +#ifdef GOOGLE_CUDA + InitGPUDevices(); +#endif + device_type_ = device_type; + std::vector> local_devices; + SessionOptions sess_opts; + sess_opts.env = Env::Default(); + Bytes mem_limit(4 << 20); + DeviceLocality dev_locality; + for (int wi = 0; wi < num_workers; ++wi) { + for (int di = 0; di < num_devices; ++di) { + if (device_type == DEVICE_CPU) { + string dev_name = + strings::StrCat("/job:worker/replica:0/task:", wi, "/cpu:", di); + local_devices.push_back(absl::make_unique( + sess_opts, dev_name, mem_limit, dev_locality, cpu_allocator())); + } else if (device_type == DEVICE_GPU && !gpu_devices_.empty()) { + int dev_idx = (wi * num_devices) + di; + if (dev_idx >= static_cast(gpu_devices_.size())) { + LOG(INFO) << "dev_mgr has access to limited GPUs, reusing for more " + "than one ring node."; + } else { + local_devices.push_back(std::move(gpu_devices_[dev_idx])); + } + } else { + LOG(FATAL) << "Unsupported device_type " << device_type; + } + } + } + if (!dev_mgr_ || device_type == DEVICE_CPU) { + LOG(ERROR) << "resetting dev_mgr for " << local_devices.size() + << " devices: "; + dev_mgr_.reset(new DeviceMgr(std::move(local_devices))); + } + if (!gpu_ring_order_) gpu_ring_order_.reset(new string()); + dev_resolver_.reset(new DeviceResolverLocal(dev_mgr_.get())); + rma_ = new FailTestRMA(dev_mgr_.get(), dev_resolver_.get(), kStepId, + fail_after); + col_exec_ = new BaseCollectiveExecutor( + &col_exec_mgr_, rma_, kStepId, dev_mgr_.get(), gpu_ring_order_.get()); + col_params_.name = "test_collective"; + static const int kGroupKey = 5; + col_params_.group.group_key = kGroupKey; + col_params_.group.device_type = device_type; + col_params_.group.group_size = num_workers * num_devices; + static const int kInstanceKey = 17; + col_params_.instance.instance_key = kInstanceKey; + col_params_.instance.impl_details.subdiv_offsets.clear(); + col_params_.instance.type = GATHER_COLLECTIVE; + col_params_.instance.impl_details.collective_name = "RingGather"; + col_params_.instance.data_type = dtype; + col_params_.instance.impl_details.subdiv_permutations.resize(num_subdivs); + col_params_.subdiv_rank.resize(num_subdivs); + int subdiv_stride = num_devices / num_subdivs; + for (int sdi = 0; sdi < num_subdivs; ++sdi) { + col_params_.instance.impl_details.subdiv_offsets.push_back(sdi * + subdiv_stride); + col_params_.subdiv_rank[sdi] = sdi * subdiv_stride; + } + + // Set up a local device ring order that's not just 0,1,2... + std::vector local_ring_order; + for (int di = 0; di < num_devices; ++di) { + local_ring_order.push_back(di); + } + for (int di = 0; di < num_devices; ++di) { + bool is_odd = ((di % 2) == 1); + int other = (di + (is_odd ? 7 : 3)) % num_devices; + if (di == other) continue; + iter_swap(local_ring_order.begin() + di, + local_ring_order.begin() + other); + } + string lro_buf; + for (auto d : local_ring_order) strings::StrAppend(&lro_buf, d, ", "); + VLOG(1) << "local_ring_order " << lro_buf; + + // Set up all of the fake device contexts. + for (int wi = 0; wi < num_workers; ++wi) { + for (int di = 0; di < num_devices; ++di) { + string task_name = strings::StrCat("/job:worker/replica:0/task:", wi); + string dev_name = strings::StrCat(task_name, "/cpu:", di); + if (device_type == DEVICE_GPU) { + dev_name = + strings::StrCat(task_name, "/gpu:", di % gpu_devices_.size()); + } + col_params_.instance.device_names.push_back(dev_name); + col_params_.instance.task_names.push_back(task_name); + // Normally each device would set is_local to its own perspective but + // this test runs in a single process so is_local is always true. + col_params_.task.is_local.push_back(true); + for (int sdi = 0; sdi < num_subdivs; ++sdi) { + int rotated_di = + (di + col_params_.instance.impl_details.subdiv_offsets[sdi]) % + num_devices; + col_params_.instance.impl_details.subdiv_permutations[sdi].push_back( + wi * num_devices + local_ring_order[rotated_di]); + } + } + } + for (int wi = 0; wi < num_workers; ++wi) { + for (int di = 0; di < num_devices; ++di) { + int rank = wi * num_devices + di; + instances_.push_back(new DeviceInstance( + rank, col_params_.instance.device_names[rank], device_type_, this)); + } + } + } + + void Gather(int fail_after) { + std::atomic done(0); + for (auto di : instances_) { + SchedClosure([di, &done] { + di->DoGather(); + ++done; + }); + if (fail_after > 0) { + // Stagger the op execution starts. + Env::Default()->SleepForMicroseconds(100); + } + } + while (done < static_cast(instances_.size())) { + if (stop_) break; + Env::Default()->SleepForMicroseconds(1000); + } + } + + template + void RunTest(DataType dtype, const DeviceType& device_type, int num_workers, + int num_devices, int num_subdivs, int tensor_len, + int fail_after) { + Init(num_workers, num_devices, dtype, device_type, num_subdivs, fail_after); + int32 output_len = tensor_len * num_workers * num_devices; + std::vector expected(output_len, 0.0); + for (int di = 0; di < static_cast(instances_.size()); ++di) { + DeviceInstance* instance = instances_[di]; + int32 instance_offset = di * tensor_len; + instance->InitTensor(dtype, TensorShape({tensor_len}), + [instance_offset, &expected, dtype, di](Tensor* t) { + for (size_t i = 0; i < t->NumElements(); ++i) { + // The cast is necessary to prevent clang-tidy + // from insisting that a faster non-open source + // function be substituted. + float value = + pow(10, static_cast(di)) * i; + if (dtype == DT_INT32 || dtype == DT_INT64) { + value = di * 10 + i; + } + t->flat()(i) = static_cast(value); + expected[instance_offset + i] = value; + } + }); + } + Gather(fail_after); + if (fail_after > 0) { + // Confirm that every device terminated with the expected error status. + for (int di = 0; di < static_cast(instances_.size()); ++di) { + EXPECT_EQ("Deliberate failure", + instances_[di]->status_.error_message()); + } + } else { + // Confirm that every device accumulated the same set of correct + // values. + for (int di = 0; di < static_cast(instances_.size()); ++di) { + TF_EXPECT_OK(instances_[di]->status_); + Tensor* inst = &instances_[di]->output_tensor_; + CHECK(inst); + Tensor actual(dtype, TensorShape({output_len})); + if (device_type_ == DEVICE_CPU) { + CHECK(actual.CopyFrom(*inst, inst->shape())); + VLOG(1) << "actual " << actual.SummarizeValue(100); + } else if (device_type_ == DEVICE_GPU) { + Notification note; + Device* dev = instances_[di]->device_; + auto* dev_info = dev->tensorflow_gpu_device_info(); + CHECK(dev_info); + dev_info->default_context->CopyDeviceTensorToCPU( + inst, "" /*tensor_name*/, dev, &actual, [¬e](const Status& s) { + CHECK(s.ok()); + note.Notify(); + }); + note.WaitForNotification(); + } + + auto alias = actual.template unaligned_flat(); + for (int i = 0; i < output_len; ++i) { + switch (dtype) { + case DT_FLOAT: + EXPECT_FLOAT_EQ(expected[i], alias(i)) + << "Mismatch at device " << di << " index " << i; + break; + case DT_DOUBLE: + EXPECT_DOUBLE_EQ(expected[i], alias(i)) + << "Mismatch at device " << di << " index " << i; + break; + case DT_INT32: + case DT_INT64: + EXPECT_EQ(expected[i], alias(i)) + << "Mismatch at device " << di << " index " << i; + break; + default: + LOG(FATAL) << "unimplemented"; + } + } + } + } + } + + std::unique_ptr GetCollectiveGather(const CollectiveParams& params, + Tensor* input, + const DeviceType& device_type, + DeviceBase* device) { + mutex_lock l(mu_); + NodeDef node_def; + NodeDefBuilder builder( + strings::StrCat("collective_gather_", gather_counter_++), + "CollectiveGather"); + TF_CHECK_OK(builder.Attr("T", params.instance.data_type) + .Attr("group_size", params.group.group_size) + .Attr("group_key", params.group.group_key) + .Attr("instance_key", params.instance.instance_key) + .Attr("shape", params.instance.shape) + .Input(FakeInput(params.instance.data_type)) + .Finalize(&node_def)); + return GetKernel(node_def, device_type, device); + } + + void RunSubdivPermsTest( + CollectiveParams* cp, + const std::vector>& expected_subdiv_perms, + const std::vector& expected_subdiv_rank) { + col_exec_ = nullptr; + cp->instance.impl_details.subdiv_permutations.clear(); + cp->subdiv_rank.clear(); + // Create a stub ring gatherer only for testing param initialization. + RingGatherer gatherer; + TF_CHECK_OK(gatherer.InitializeCollectiveParams(cp)); + EXPECT_EQ(expected_subdiv_perms, + cp->instance.impl_details.subdiv_permutations); + EXPECT_EQ(expected_subdiv_rank, cp->subdiv_rank); + } + + class DeviceInstance { + public: + DeviceInstance(int rank, const string& dev_name, + const DeviceType& device_type, RingGathererTest* parent) + : parent_(parent), + dev_name_(dev_name), + device_type_(device_type), + rank_(rank) { + TF_CHECK_OK(parent_->dev_mgr_->LookupDevice(dev_name, &device_)) + << "Couldn't find device " << dev_name + << " existing devices: " << parent_->dev_mgr_->DebugString(); + col_params_.name = parent_->col_params_.name; + col_params_.group.group_key = parent_->col_params_.group.group_key; + col_params_.group.device_type = parent_->col_params_.group.device_type; + col_params_.group.group_size = parent_->col_params_.group.group_size; + col_params_.instance = parent->col_params_.instance; + col_params_.task.is_local = parent_->col_params_.task.is_local; + col_params_.subdiv_rank = parent_->col_params_.subdiv_rank; + + int num_subdivs = static_cast(col_params_.subdiv_rank.size()); + int group_size = col_params_.group.group_size; + CHECK_EQ(group_size, + static_cast(col_params_.instance.device_names.size())); + // Id of this device is at rank position in first subdiv perm. + int my_device_id = + col_params_.instance.impl_details.subdiv_permutations[0][rank]; + col_params_.default_rank = my_device_id; + // Set rank for all other subdivs by finding that device_id. + for (int sdi = 0; sdi < num_subdivs; ++sdi) { + for (int r = 0; r < static_cast(col_params_.instance.impl_details + .subdiv_permutations[sdi] + .size()); + ++r) { + if (my_device_id == + col_params_.instance.impl_details.subdiv_permutations[sdi][r]) { + col_params_.subdiv_rank[sdi] = r; + break; + } + } + } + } + + void InitTensor(DataType dtype, const TensorShape& shape, + const std::function& init_f) { + input_tensor_ = + Tensor(device_->GetAllocator(AllocatorAttributes()), dtype, shape); + if (device_type_ == DEVICE_CPU) { + init_f(&input_tensor_); + } else if (device_type_ == DEVICE_GPU) { + Tensor cpu_tensor(dtype, shape); + init_f(&cpu_tensor); + auto* dev_info = device_->tensorflow_gpu_device_info(); + CHECK(dev_info); + Notification note; + dev_info->default_context->CopyCPUTensorToDevice( + &cpu_tensor, device_, &input_tensor_, [¬e](const Status& s) { + CHECK(s.ok()); + note.Notify(); + }); + note.WaitForNotification(); + } else { + LOG(FATAL) << "Unsupported device_type " << device_type_; + } + } + + void DoGather() { + // Prepare an OpKernelContext. + OpKernelContext::Params op_params; + op_params.step_id = kStepId; + op_params.device = device_; + gtl::InlinedVector inputs; + inputs.push_back(TensorValue(&input_tensor_)); + op_params.inputs = &inputs; + gtl::InlinedVector input_aa( + {AllocatorAttributes()}); + op_params.input_alloc_attrs = &input_aa; + gtl::InlinedVector input_dc; + DeviceContext* dev_ctx = nullptr; + auto* dev_info = device_->tensorflow_gpu_device_info(); + if (dev_info) { + dev_ctx = dev_info->default_context; + dev_ctx->Ref(); + } else { + dev_ctx = new DeviceContext; + } + input_dc.push_back(dev_ctx); + op_params.input_device_contexts = &input_dc; + op_params.op_device_context = dev_ctx; + AllocatorAttributes generic_alloc_attr; + op_params.output_attr_array = &generic_alloc_attr; + std::unique_ptr op = parent_->GetCollectiveGather( + col_params_, &input_tensor_, DEVICE_CPU, device_); + op_params.op_kernel = op.get(); + OpKernelContext ctx(&op_params, 1); + + // We never actually execute the kernel, so we need to do the output + // allocation it would do, ourselves. + Tensor* output_tensor_ptr = nullptr; + TensorShape output_shape({static_cast( + parent_->instances_.size() * input_tensor_.shape().num_elements())}); + TF_CHECK_OK(ctx.forward_input_or_allocate_output({0}, 0, output_shape, + &output_tensor_ptr)); + CHECK_EQ(output_tensor_ptr, ctx.mutable_output(0)); + // Prepare a RingGatherer instance. + string exec_key = + strings::StrCat(col_params_.instance.instance_key, ":0:0"); + RingGatherer gatherer; + CollectiveContext col_ctx(parent_->col_exec_, parent_->dev_mgr_.get(), + &ctx, &op_params, col_params_, exec_key, + kStepId, &input_tensor_, output_tensor_ptr); + TF_CHECK_OK(gatherer.InitializeCollectiveContext(&col_ctx)); + + // Run the all-gather. + gatherer.Run([this](Status s) { status_ = s; }); + if (status_.ok()) { + CHECK(output_tensor_.CopyFrom(*ctx.mutable_output(0), + ctx.mutable_output(0)->shape())); + } + + dev_ctx->Unref(); + } + + const Tensor& input_tensor() { return input_tensor_; } + const Tensor& output_tensor() { return output_tensor_; } + + RingGathererTest* parent_; + string dev_name_; + DeviceType device_type_; + int rank_; + Tensor input_tensor_; + Tensor output_tensor_; + Device* device_; + CollectiveParams col_params_; + std::unique_ptr ca_; + std::unique_ptr ctx_; + Status status_; + }; + + bool stop_ = false; + DeviceType device_type_; + TestCollectiveExecutorMgr col_exec_mgr_; + CollectiveExecutor* col_exec_; + CollectiveRemoteAccessLocal* rma_; + std::unique_ptr dev_resolver_; + std::vector instances_; + CollectiveParams col_params_; + std::vector> gpu_devices_; + std::unique_ptr dev_mgr_; + std::unique_ptr gpu_ring_order_; + mutex mu_; + int32 gather_counter_ GUARDED_BY(mu_) = 0; +}; + +CollectiveParams SetUpCollectiveParams(const int num_devs_per_task, + const int num_tasks) { + CollectiveParams cp; + const int kNumDevs = num_devs_per_task * num_tasks; + cp.group.group_key = 1; + cp.group.group_size = kNumDevs; + cp.group.device_type = DeviceType("GPU"); + cp.group.num_tasks = num_tasks; + cp.instance.instance_key = 3; + cp.instance.type = GATHER_COLLECTIVE; + cp.instance.data_type = DataType(DT_FLOAT); + cp.instance.shape = TensorShape({kNumDevs * kNumDevs}); + cp.instance.impl_details.collective_name = "RingGather"; + cp.instance.impl_details.subdiv_offsets.push_back(0); + cp.is_source = false; + for (int i = 0; i < kNumDevs; ++i) { + int task_id = i / num_devs_per_task; + int dev_id = i % num_devs_per_task; + string task_name = strings::StrCat("/job:worker/replica:0/task:", task_id); + string device_name = strings::StrCat(task_name, "/device:GPU:", dev_id); + cp.instance.task_names.push_back(task_name); + cp.instance.device_names.push_back(device_name); + } + return cp; +} + +TEST_F(RingGathererTest, InitializeParams) { + const int kNumDevsPerTask = 8; + const int kNumTasks = 3; + CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks); + + cp.default_rank = 0; + cp.instance.impl_details.subdiv_offsets = {}; + RunSubdivPermsTest(&cp, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}, + {0}); + + cp.instance.impl_details.subdiv_offsets = {0}; + RunSubdivPermsTest(&cp, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}, + {0}); + + cp.default_rank = 3; + cp.instance.impl_details.subdiv_offsets = {}; + RunSubdivPermsTest(&cp, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}, + {3}); +} + +// TODO(b/113171733): change to use TEST_P. +#define DEF_TEST(B, T, W, D, S, L, A) \ + TEST_F(RingGathererTest, \ + DaTy##B##_DevTy##T##_Wkr##W##_Dev##D##_Sdiv##S##_Len##L##_Abrt##A) { \ + DataType dtype = DT_##B; \ + switch (dtype) { \ + case DT_FLOAT: { \ + RunTest(dtype, DEVICE_##T, W, D, S, L, A); \ + } break; \ + case DT_DOUBLE: { \ + RunTest(dtype, DEVICE_##T, W, D, S, L, A); \ + } break; \ + case DT_INT32: { \ + RunTest(dtype, DEVICE_##T, W, D, S, L, A); \ + } break; \ + case DT_INT64: { \ + RunTest(dtype, DEVICE_##T, W, D, S, L, A); \ + } break; \ + default: \ + LOG(FATAL) << "Unimplemented"; \ + } \ + } + +#ifndef GOOGLE_CUDA +// Success tests +DEF_TEST(FLOAT, CPU, 1, 2, 1, 1, 0) +DEF_TEST(FLOAT, CPU, 1, 2, 1, 2, 0) +DEF_TEST(FLOAT, CPU, 1, 2, 1, 8, 0) +DEF_TEST(FLOAT, CPU, 1, 2, 1, 16, 0) +DEF_TEST(FLOAT, CPU, 1, 2, 1, 1001, 0) +DEF_TEST(FLOAT, CPU, 2, 4, 1, 128, 0) +DEF_TEST(FLOAT, CPU, 2, 8, 1, 1001, 0) +DEF_TEST(FLOAT, CPU, 2, 8, 1, 4096, 0) +DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 0) +DEF_TEST(FLOAT, CPU, 4, 4, 1, 32768, 0) +DEF_TEST(DOUBLE, CPU, 1, 2, 1, 1001, 0) +DEF_TEST(DOUBLE, CPU, 2, 8, 1, 4095, 0) +DEF_TEST(INT32, CPU, 1, 2, 1, 1001, 0) +DEF_TEST(INT32, CPU, 2, 8, 1, 4095, 0) +DEF_TEST(INT64, CPU, 1, 2, 1, 1001, 0) +DEF_TEST(INT64, CPU, 2, 8, 1, 4095, 0) + +// Failure tests +DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 1) +DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 7) +DEF_TEST(FLOAT, CPU, 2, 8, 1, 9408, 11) +#endif + +#ifdef GOOGLE_CUDA +// GPU tests. So long as the device names are all in a single tasks we +// bypass inter-worker routing code and can fake multiple GPUs with a single +// GPU, from the perspective of the RingGatherer logic. So these tests +// are all single-worker. +DEF_TEST(FLOAT, GPU, 1, 2, 1, 1, 0) +DEF_TEST(FLOAT, GPU, 1, 2, 1, 2, 0) +DEF_TEST(FLOAT, GPU, 1, 2, 1, 8, 0) +DEF_TEST(FLOAT, GPU, 1, 2, 1, 16, 0) +DEF_TEST(FLOAT, GPU, 1, 2, 1, 1001, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1, 1001, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1, 4096, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1, 4095, 0) +DEF_TEST(FLOAT, GPU, 1, 8, 1, 32768, 0) +DEF_TEST(FLOAT, GPU, 1, 4, 1, 32768, 0) +DEF_TEST(DOUBLE, GPU, 1, 2, 1, 1001, 0) +// INT32 values are never on the GPU. +// DEF_TEST(INT32, GPU, 1, 1, 1, 1001, 0) +DEF_TEST(INT64, GPU, 1, 2, 1, 1001, 0) + +// Failure tests +DEF_TEST(FLOAT, GPU, 1, 8, 1, 9408, 2) +DEF_TEST(FLOAT, GPU, 1, 8, 1, 9408, 5) +#endif + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/ring_reducer.cc b/tensorflow/core/common_runtime/ring_reducer.cc index 8ed2fc2f1c..3328804cdf 100644 --- a/tensorflow/core/common_runtime/ring_reducer.cc +++ b/tensorflow/core/common_runtime/ring_reducer.cc @@ -39,212 +39,15 @@ limitations under the License. #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/types.h" -// Set true for greater intelligibility of debug mode log messages. -#define READABLE_KEYS false -// RingReduce algorithm exchanges chunks of tensor between devices. The chunk -// size depends on the number of subdivisions specified in the algorithm. If -// the user does not specify the number of subdivisions, we infer the number -// dynamically so that the resulting chunk size does not exceed -// kMaxChunkSizeBytes, empirically set at 4 MiB. -constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024); -// kMaxSubdivsPerDev is used to give an upper bound on the number of -// subdivisions dynamically generated. A reasonable value would be a small -// multiple of the number of NICs adjacent to each device. -constexpr int kMaxSubdivsPerDevice = 2; - namespace tensorflow { -namespace { -// Each CollectiveOp implementation is free to define its own -// BufRendezvous key format. This function produces the key used by -// RingReducer. -string RingReduceBufKey(const string& exec_key, int pass, int section, - int source_rank) { - if (READABLE_KEYS) { - return strings::StrCat("rred(", exec_key, "):pass(", pass, "):section(", - section, "):srcrank(", source_rank, ")"); - } else { - // TODO(b/78352018): Try out some kind of denser encoding, e.g. 128 bit - // hash. - return strings::StrCat(exec_key, ":", pass, ":", section, ":", source_rank); - } -} - -} // namespace - -void RingReducer::PCQueue::Enqueue(RingField* rf) { - mutex_lock l(pcq_mu_); - deque_.push_back(rf); - if (waiter_count_ > 0) { - cv_.notify_one(); - } -} - -RingReducer::RingField* RingReducer::PCQueue::Dequeue() { - mutex_lock l(pcq_mu_); - if (deque_.empty()) { - ++waiter_count_; - while (deque_.empty()) { - cv_.wait(l); - } - --waiter_count_; - } - RingField* rf = deque_.front(); - deque_.pop_front(); - return rf; -} - -RingReducer::RingReducer() - : col_ctx_(nullptr), - col_params_(nullptr), - done_(nullptr), - group_size_(-1), - num_subdivs_(-1) {} RingReducer::~RingReducer() { group_size_tensor_ready_.WaitForNotification(); } -Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) { - if (col_params->instance.shape.num_elements() == 0) { - return errors::Internal("shape in CollectiveParams should be non-empty"); - } - const int kAvgDevPerTask = - col_params->group.group_size / col_params->group.num_tasks; - const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask; - if (kMaxNumSubdivs <= 0) { - return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs, - " in RingReducer"); - } - // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add - // as many offsets as needed so that the size of tensor chunks <= - // kMaxChunkSizeBytes. Empirically, chunks that are too small or too large - // lead to worse performance. - int num_subdivs = 0; - const size_t tensor_size = col_params->instance.shape.num_elements() * - DataTypeSize(col_params->instance.data_type); - size_t chunk_size; - do { - ++num_subdivs; - int num_chunks = col_params->group.group_size * num_subdivs; - chunk_size = tensor_size / num_chunks; - VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks - << " chunk_size " << chunk_size; - } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs); - if (num_subdivs <= 0) { - return errors::Internal("Unexpected num_subdivs ", num_subdivs, - " in RingReducer"); - } - - int subdiv_stride = kAvgDevPerTask / num_subdivs; - if (subdiv_stride == 0) subdiv_stride = 1; - col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs); - for (int sdi = 0; sdi < num_subdivs; ++sdi) { - int subdiv_offset = subdiv_stride * sdi; - if (sdi % 2 == 1) subdiv_offset *= -1; - col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset); - } - - if (VLOG_IS_ON(2)) { - string subdiv_buf; - for (const int subdiv_offset : - col_params->instance.impl_details.subdiv_offsets) { - strings::StrAppend(&subdiv_buf, " ", subdiv_offset); - } - VLOG(2) << "Dynamically generated " << num_subdivs - << " subdiv_offsets:" << subdiv_buf << " tensor_size " - << tensor_size << " chunk_size " << chunk_size; - } - - return Status::OK(); -} - Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) { // TODO(b/113171733): change CHECKs to return errors. CHECK_EQ(col_params->instance.type, REDUCTION_COLLECTIVE); CHECK_EQ(col_params->instance.impl_details.collective_name, "RingReduce"); - const string& device_name = - col_params->instance.device_names[col_params->default_rank]; - // Each subdiv permutation is a ring formed by rotating each - // single-task subsequence of devices by an offset. This makes most - // sense when each task has the same number of devices but we can't - // depend on that being the case so we'll compute something that - // works in any case. - - // Start by counting the devices in each task. - // Precondition: device_names must be sorted so that all devices in - // the same task are adjacent. - VLOG(2) << "Sorted task names: " - << str_util::Join(col_params->instance.task_names, ", "); - std::vector dev_per_task; - const string* prior_task_name = &col_params->instance.task_names[0]; - int dev_count = 1; - for (int di = 1; di < col_params->group.group_size; ++di) { - if (col_params->instance.task_names[di] != *prior_task_name) { - dev_per_task.push_back(dev_count); - dev_count = 1; - prior_task_name = &col_params->instance.task_names[di]; - } else { - ++dev_count; - } - } - dev_per_task.push_back(dev_count); - CHECK_EQ(col_params->group.num_tasks, dev_per_task.size()); - - if (col_params->instance.impl_details.subdiv_offsets.empty()) { - TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params)); - } - - // Generate a ring permutation for requested offset. - VLOG(2) << "Setting up perms for col_params " << col_params - << " subdiv_permutations " - << &col_params->instance.impl_details.subdiv_permutations; - col_params->instance.impl_details.subdiv_permutations.resize( - col_params->instance.impl_details.subdiv_offsets.size()); - col_params->subdiv_rank.resize( - col_params->instance.impl_details.subdiv_offsets.size(), -1); - for (int sdi = 0; - sdi < col_params->instance.impl_details.subdiv_offsets.size(); ++sdi) { - std::vector& perm = - col_params->instance.impl_details.subdiv_permutations[sdi]; - CHECK_EQ(perm.size(), 0); - int offset = col_params->instance.impl_details.subdiv_offsets[sdi]; - // A negative subdivision offset is interpreted as follows: - // 1. Reverse the local device ordering. - // 2. Begin the subdivision at abs(offset) in the reversed ordering. - bool reverse = false; - if (offset < 0) { - offset = abs(offset); - reverse = true; - } - int prior_dev_count = 0; // sum over prior worker device counts - for (int ti = 0; ti < col_params->group.num_tasks; ++ti) { - for (int di = 0; di < dev_per_task[ti]; ++di) { - int di_offset = (di + offset) % dev_per_task[ti]; - int offset_di = - reverse ? (dev_per_task[ti] - (di_offset + 1)) : di_offset; - // Device index in global subdivision permutation. - int permuted_di = prior_dev_count + offset_di; - int rank = static_cast(perm.size()); - perm.push_back(permuted_di); - if (col_params->instance.device_names[permuted_di] == device_name) { - CHECK_EQ(permuted_di, col_params->default_rank); - col_params->subdiv_rank[sdi] = rank; - } - } - prior_dev_count += dev_per_task[ti]; - } - CHECK_EQ(col_params->group.group_size, perm.size()); - } - - VLOG(2) << collective_util::SubdivPermDebugString(*col_params); - return Status::OK(); -} - -Status RingReducer::InitializeCollectiveContext(CollectiveContext* col_ctx) { - CHECK(col_ctx->dev_mgr); - col_ctx_ = col_ctx; - col_params_ = &col_ctx->col_params; - return collective_util::InitializeDeviceAndLocality( - col_ctx->dev_mgr, col_ctx->device_name, &col_ctx->device, - &col_ctx->device_locality); + return RingAlg::InitializeCollectiveParams(col_params); } void RingReducer::Run(StatusCallback done) { @@ -303,25 +106,6 @@ void RingReducer::Run(StatusCallback done) { ContinueAfterInputCopy(); } -string RingReducer::TensorDebugString(const Tensor& tensor) { - const DeviceBase::GpuDeviceInfo* gpu_device_info = - col_ctx_->op_ctx->device()->tensorflow_gpu_device_info(); - if (gpu_device_info) { - Tensor cpu_tensor(tensor.dtype(), tensor.shape()); - Notification note; - gpu_device_info->default_context->CopyDeviceTensorToCPU( - &tensor, "" /*tensor_name*/, col_ctx_->device, &cpu_tensor, - [¬e](const Status& s) { - CHECK(s.ok()); - note.Notify(); - }); - note.WaitForNotification(); - return cpu_tensor.SummarizeValue(64); - } else { - return tensor.SummarizeValue(64); - } -} - // Note that this function is blocking and must not run in any thread // which cannot be blocked. void RingReducer::ContinueAfterInputCopy() { @@ -358,170 +142,16 @@ void RingReducer::ContinueAfterInputCopy() { Finish(RunAsyncParts()); } -void RingReducer::StartAbort(const Status& s) { - // In abort mode we stop issuing additional ProvideBuf - // and ConsumeBuf calls, but we need to wait for all of the - // outstanding callbacks to be invoked before quitting. - bool abort_started = false; - { - mutex_lock l(status_mu_); - if (status_.ok()) { - LOG(ERROR) << "Aborting RingReduce with " << s; - abort_started = true; - status_.Update(s); - } - } - // If this is the initial entry to abort mode then invoke StartAbort - // on the CollectiveExecutor that invoked us. That should start - // cancellation on all of the outstanding CollectiveRemoteAccess - // actions. - if (abort_started) { - col_ctx_->col_exec->StartAbort(s); - } -} - -void RingReducer::Finish(bool ok) { - if (ok) { - // Recover the output from the adaptor. - ca_->ConsumeFinalValue(col_ctx_->output); - } - Status s; - { - mutex_lock l(status_mu_); - s = status_; - } - rfv_.clear(); // Give up Refs on output tensor. - done_(s); -} - -// At the beginning of the algorithm initialize a RingField struct for -// every independent field of the tensor. void RingReducer::InitRingField(RingField* rf, int chunk_idx, int subdiv_idx, int field_idx) { - // Note on field indexing: There are group_size_ devices in the - // instance, implying the same number of chunks per tensor, where a - // chunk is the unit of data transferred in a time step. However, if - // a device can simultaneously send data by 2 or more independent - // channels we can speed up the transfer by subdividing chunks and - // processing multiple subdivisions at once. So the actual number - // of RingFields is group_size_ * num_subdivs_. - DCHECK_EQ(field_idx, (chunk_idx * num_subdivs_) + subdiv_idx); - rf->chunk_idx = chunk_idx; - rf->subdiv_idx = subdiv_idx; - rf->sc_idx = field_idx; - rf->rank = col_params_->subdiv_rank[subdiv_idx]; - rf->second_pass = false; - rf->action = RF_INIT; - // Recv from the device with preceding rank within the subdivision. - int recv_from_rank = (rf->rank + (group_size_ - 1)) % group_size_; - int send_to_rank = (rf->rank + 1) % group_size_; - rf->recv_dev_idx = col_params_->instance.impl_details - .subdiv_permutations[subdiv_idx][recv_from_rank]; - int send_dev_idx = col_params_->instance.impl_details - .subdiv_permutations[subdiv_idx][send_to_rank]; - rf->recv_is_remote = !col_params_->task.is_local[rf->recv_dev_idx]; - rf->send_is_remote = !col_params_->task.is_local[send_dev_idx]; - if (ca_->ChunkBytes(rf->sc_idx) > 0) { - // In pass 0 we skip Recv when rank = chunk_idx - rf->do_recv = (rf->chunk_idx != rf->rank); - // In pass 0 we skip Send when rank = chunk_idx-1 - rf->do_send = - (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); - } - rf->is_final = - (rf->rank == ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); - if (rf->do_send || rf->do_recv) { - rf->chunk = ca_->ChunkAlias(rf->sc_idx); - CHECK(rf->chunk.IsAligned()) << rf->DebugString(); - } + RingAlg::InitRingField(rf, chunk_idx, subdiv_idx, field_idx); if (rf->do_recv) { rf->tmp_chunk = ca_->TempChunk(rf->sc_idx); - CHECK(rf->tmp_chunk.IsAligned()) << rf->DebugString(); - } - VLOG(2) << this << " InitRingField " << rf->DebugString() << " chunk " - << ca_->TBounds(rf->chunk); -} - -// When a RingField transitions from first to second recompute the -// do_send and do_recv values. -void RingReducer::AdvanceToSecondPass(RingField* rf) { - VLOG(3) << "IncrRingField old value " << rf->DebugString(); - CHECK(!rf->second_pass); - rf->second_pass = true; - rf->action = RF_INIT; - if (ca_->ChunkBytes(rf->sc_idx) > 0) { - // In pass 1 the send/no-send boundary moves down 1 place. - rf->do_recv = - (rf->rank != ((rf->chunk_idx + (group_size_ - 1)) % group_size_)); - rf->do_send = - (rf->rank != ((rf->chunk_idx + (group_size_ - 2)) % group_size_)); - } - rf->is_final = - (rf->rank == ((rf->chunk_idx + (group_size_ - 2)) % group_size_)); - VLOG(3) << "IncrRingField new value " << rf->DebugString(); -} - -string RingReducer::RingField::DebugString() const { - string rv = strings::StrCat("RingField rank=", rank, " chunk_idx=", chunk_idx, - " subdiv=", subdiv_idx, " sc_idx=", sc_idx, - " action=", action); - strings::StrAppend(&rv, " pass=", second_pass); - strings::StrAppend(&rv, " do_send=", do_send, " do_recv=", do_recv, - " is_final=", is_final, " recv_is_remote=", recv_is_remote, - " recv_dev_idx=", recv_dev_idx, " sc_idx=", sc_idx); - return rv; -} - -void RingReducer::DispatchSend(RingField* rf, const StatusCallback& done) { - CHECK(rf->do_send); - string send_buf_key = RingReduceBufKey(col_ctx_->exec_key, rf->second_pass, - rf->sc_idx, rf->rank); - VLOG(3) << "DispatchSend rank=" << col_params_->default_rank << " send key " - << send_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " sc_idx " - << rf->sc_idx; - int send_to_rank = (rf->rank + 1) % group_size_; - int send_to_dev_idx = col_params_->instance.impl_details - .subdiv_permutations[rf->subdiv_idx][send_to_rank]; - col_ctx_->col_exec->PostToPeer( - col_params_->instance.device_names[send_to_dev_idx], - col_params_->instance.task_names[send_to_dev_idx], send_buf_key, - col_ctx_->device, col_ctx_->op_ctx->op_device_context(), - col_ctx_->op_ctx->output_alloc_attr(0), &rf->chunk, - col_ctx_->device_locality, done); -} - -void RingReducer::DispatchRecv(RingField* rf, const StatusCallback& done) { - CHECK(rf->do_recv); - string recv_buf_key = - RingReduceBufKey(col_ctx_->exec_key, rf->second_pass, rf->sc_idx, - (rf->rank + (group_size_ - 1)) % group_size_); - VLOG(3) << "DispatchRecv rank=" << col_params_->default_rank << " recv key " - << recv_buf_key << " chunk " << ca_->TBounds(rf->chunk) << " into " - << ((col_params_->merge_op != nullptr) ? "tmp_chunk" : "chunk"); - Tensor* dst_tensor = (!rf->second_pass && (col_params_->merge_op != nullptr)) - ? &rf->tmp_chunk - : &rf->chunk; - col_ctx_->col_exec->RecvFromPeer( - col_params_->instance.device_names[rf->recv_dev_idx], - col_params_->instance.task_names[rf->recv_dev_idx], - col_params_->task.is_local[rf->recv_dev_idx], recv_buf_key, - col_ctx_->device, col_ctx_->op_ctx->op_device_context(), - col_ctx_->op_ctx->output_alloc_attr(0), dst_tensor, - col_ctx_->device_locality, rf->subdiv_idx, done); -} - -string RingReducer::FieldState() { - string s = strings::StrCat( - "RingReducer ", strings::Hex(reinterpret_cast(this)), " exec ", - col_ctx_->exec_key, " step_id=", col_ctx_->step_id, " state of all ", - rfv_.size(), " fields:"); - for (int i = 0; i < rfv_.size(); ++i) { - s.append("\n"); - s.append(rfv_[i].DebugString()); } - return s; } +// At the beginning of the algorithm initialize a RingField struct for +// every independent field of the tensor. bool RingReducer::RunAsyncParts() { // This function orchestrates RingReduce actions on behalf of a // single device. It is entered by a blockable thread that diff --git a/tensorflow/core/common_runtime/ring_reducer.h b/tensorflow/core/common_runtime/ring_reducer.h index a5aa8fad70..a681fabd2b 100644 --- a/tensorflow/core/common_runtime/ring_reducer.h +++ b/tensorflow/core/common_runtime/ring_reducer.h @@ -21,108 +21,36 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/base_collective_executor.h" +#include "tensorflow/core/common_runtime/ring_alg.h" #include "tensorflow/core/framework/collective.h" namespace tensorflow { class Device; // Ring-algorithm implementation of collective all-reduce. -class RingReducer : public CollectiveImplementationInterface { +class RingReducer : public RingAlg { public: - RingReducer(); + RingReducer() : RingAlg(REDUCTION_COLLECTIVE, "Reduce") {} ~RingReducer() override; - // Establishes the requested number of subdivision permutations based on the - // ring order implicit in the device order. - Status InitializeCollectiveParams(CollectiveParams* col_params) override; - - // Initializes members of CollectiveContext not yet initialized, i.e. device - // and device_locality. Also saves the CollectiveContext in this object. - Status InitializeCollectiveContext(CollectiveContext* col_ctx) override; - - // No-op for ring reducer. - Status InitializeInstanceBeforeGroupDiscovery(CollectiveParams*) override { - return Status::OK(); - } - // Begins async execution of the ring reduce algorithm. // Must be called in a blockable thread. // TODO(b/80529858): remove the previous warning when we have a dedicated // collective threadpool. void Run(StatusCallback done) override; - private: - // Called when a bad status is received that implies we should terminate - // execution and return a bad status. - void StartAbort(const Status& s); - void ContinueAfterInputCopy(); - void Finish(bool ok); - bool RunAsyncParts(); - - // Current status of a RingField - enum RingFieldAction { - RF_INIT = 0, // Just initialized for a pass - RF_RECV, // Recv pending - RF_REDUCE, // Reduce pending - RF_FINALIZE, // FinalOp pending - RF_SEND_READY, // Ready to send - RF_SEND, // Send pending - RF_DONE, // No more work - }; + Status InitializeCollectiveParams(CollectiveParams* col_params) override; - // Tracks progress of actions on a single subfield of the entire tensor. - struct RingField { - int16 chunk_idx; // major division index - int16 subdiv_idx; // minor division index - int16 sc_idx; // subchunk index - int16 rank; // rank within subdiv permutation - int16 recv_dev_idx; // dev from which value should be recv'd - RingFieldAction action; - bool second_pass; - bool recv_is_remote = false; - bool send_is_remote = false; - bool do_send = false; // is the value sent in this pass? - bool do_recv = false; // is the value recv'd in this pass? - bool is_final = false; // is the last field in the pass for this rank - Tensor chunk; // alias to field values - Tensor tmp_chunk; - Status status; - string DebugString() const; - }; - void AdvanceToSecondPass(RingField* rf); + protected: void InitRingField(RingField* rf, int chunk_idx, int subdiv_idx, - int field_idx); - void DispatchSend(RingField* rf, const StatusCallback& done); - void DispatchRecv(RingField* rf, const StatusCallback& done); - - // For constructing log messages for debugging. - string FieldState(); - string TensorDebugString(const Tensor& tensor); - - // Producer/Consumer Queue of RingField structs. - class PCQueue { - public: - void Enqueue(RingField* rf); - RingField* Dequeue(); + int field_idx) override; - private: - mutex pcq_mu_; - condition_variable cv_; - int waiter_count_ GUARDED_BY(pcq_mu_) = 0; - std::deque deque_ GUARDED_BY(pcq_mu_); - }; + private: + void ContinueAfterInputCopy(); + bool RunAsyncParts(); - CollectiveContext* col_ctx_; // Not owned - const CollectiveParams* col_params_; // Not owned - StatusCallback done_; - int group_size_; - int num_subdivs_; Tensor group_size_tensor_; Notification group_size_tensor_ready_; - std::unique_ptr ca_; - mutex status_mu_; - Status status_ GUARDED_BY(status_mu_); - std::vector rfv_; friend class RingReducerTest; }; diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc index 7feb29a6db..7f18cdb5e2 100644 --- a/tensorflow/core/common_runtime/ring_reducer_test.cc +++ b/tensorflow/core/common_runtime/ring_reducer_test.cc @@ -335,19 +335,20 @@ class RingReducerTest : public ::testing::Test { note.WaitForNotification(); } + auto alias = actual.template unaligned_flat(); for (int i = 0; i < tensor_len; ++i) { switch (dtype) { case DT_FLOAT: - EXPECT_FLOAT_EQ(expected[i], actual.template flat()(i)) + EXPECT_FLOAT_EQ(expected[i], alias(i)) << "Mismatch at device " << di << " index " << i; break; case DT_DOUBLE: - EXPECT_DOUBLE_EQ(expected[i], actual.template flat()(i)) + EXPECT_DOUBLE_EQ(expected[i], alias(i)) << "Mismatch at device " << di << " index " << i; break; case DT_INT32: case DT_INT64: - EXPECT_EQ(expected[i], actual.template flat()(i)) + EXPECT_EQ(expected[i], alias(i)) << "Mismatch at device " << di << " index " << i; break; default: diff --git a/tensorflow/core/framework/collective.h b/tensorflow/core/framework/collective.h index 546e3938a8..e00cc17961 100644 --- a/tensorflow/core/framework/collective.h +++ b/tensorflow/core/framework/collective.h @@ -42,6 +42,7 @@ class Tensor; enum CollectiveType { REDUCTION_COLLECTIVE = 0, BROADCAST_COLLECTIVE, + GATHER_COLLECTIVE, UNDEFINED_COLLECTIVE, }; diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc index 56843eb773..23356283bb 100644 --- a/tensorflow/core/kernels/collective_ops.cc +++ b/tensorflow/core/kernels/collective_ops.cc @@ -68,6 +68,57 @@ class CollectiveOpKernel : public AsyncOpKernel { std::vector dependencies_; }; +class CollectiveGatherOpKernel : public CollectiveOpKernel { + public: + explicit CollectiveGatherOpKernel(OpKernelConstruction* c) + : CollectiveOpKernel(c) { + col_params_.instance.type = GATHER_COLLECTIVE; + OP_REQUIRES_OK(c, c->GetAttr("group_size", &col_params_.group.group_size)); + OP_REQUIRES_OK(c, c->GetAttr("group_key", &col_params_.group.group_key)); + OP_REQUIRES_OK( + c, c->GetAttr("instance_key", &col_params_.instance.instance_key)); + OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type)); + OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape)); + const NodeDef& real_node = c->def(); + col_params_.name = strings::StrCat(real_node.name(), ": Gather"); + col_params_.group.device_type = c->device_type(); + } + + void ComputeAsync(OpKernelContext* c, DoneCallback done) override { + CollectiveExecutor* col_exec = c->collective_executor(); + OP_REQUIRES_ASYNC( + c, col_exec, + errors::Internal( + "Failed to get CollectiveExecutor from OpKernelContext for Op ", + col_params_.name), + done); + // Allocate output on the first pass through this function. This must be + // done immediately, while we're still in the executor thread. Otherwise + // the memory is not guaranteed to be unused by any concurrently executing + // GPU kernel. + if (c->mutable_output(0) == nullptr) { + // Allocate the output tensor. + Tensor* output = nullptr; + OP_REQUIRES_OK_ASYNC( + c, c->allocate_output(0, col_params_.instance.shape, &output), done); + } + if (!CanProceedWithCompute(c, col_exec, done)) return; + auto actual_done = [c, done](const Status& s) { + OP_REQUIRES_OK_ASYNC(c, s, done); + done(); + }; + col_exec->ExecuteAsync(c, col_params_, GetCollectiveKey(c), actual_done); + } + + private: + TF_DISALLOW_COPY_AND_ASSIGN(CollectiveGatherOpKernel); +}; + +REGISTER_KERNEL_BUILDER(Name("CollectiveGather").Device(DEVICE_CPU), + CollectiveGatherOpKernel); +REGISTER_KERNEL_BUILDER(Name("CollectiveGather").Device(DEVICE_GPU), + CollectiveGatherOpKernel); + class CollectiveReduceOpKernel : public CollectiveOpKernel { public: explicit CollectiveReduceOpKernel(OpKernelConstruction* c) diff --git a/tensorflow/core/ops/collective_ops.cc b/tensorflow/core/ops/collective_ops.cc index e45a8a9b36..06e5f14de7 100644 --- a/tensorflow/core/ops/collective_ops.cc +++ b/tensorflow/core/ops/collective_ops.cc @@ -32,6 +32,17 @@ REGISTER_OP("CollectiveReduce") .SetIsStateful() .SetShapeFn(shape_inference::UnchangedShape); +REGISTER_OP("CollectiveGather") + .Input("input: T") + .Output("data: T") + .Attr("T: {float, float16, float64, int32, int64}") + .Attr("group_size: int") + .Attr("group_key: int") + .Attr("instance_key: int") + .Attr("shape: shape") + .SetIsStateful() + .SetShapeFn(shape_inference::ExplicitShape); + REGISTER_OP("CollectiveBcastSend") .Input("input: T") .Output("data: T") diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py index 6210010cff..437c6abbff 100644 --- a/tensorflow/python/framework/auto_control_deps.py +++ b/tensorflow/python/framework/auto_control_deps.py @@ -32,6 +32,7 @@ from tensorflow.python.util import tf_decorator # Op types that should not run in program order, e.g. because they need to run # asynchronously to avoid deadlock. ASYNC_STATEFUL_OPS = [ + "CollectiveGather", "CollectiveReduce", "CollectiveBcastSend", "CollectiveBcastRecv", diff --git a/tensorflow/python/ops/collective_ops.py b/tensorflow/python/ops/collective_ops.py index 98668facd5..32a71fc25d 100644 --- a/tensorflow/python/ops/collective_ops.py +++ b/tensorflow/python/ops/collective_ops.py @@ -48,7 +48,7 @@ def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, if not device.canonical_name(t.device): raise ValueError('Device assignment required for collective ops') if group_size <= 1: - raise ValueError('Parameter group_size to add_reduce must be at least 2.') + raise ValueError('Parameter group_size to all_reduce must be at least 2.') return gen_collective_ops.collective_reduce(t, group_size=group_size, group_key=group_key, @@ -58,6 +58,35 @@ def all_reduce(t, group_size, group_key, instance_key, merge_op, final_op, subdiv_offsets=subdiv_offsets) +def all_gather(t, group_size, group_key, instance_key): + """Accumulates tensors collectively, across devices, along first dimension. + + Args: + t: the tensor to participate in the accumulation. + group_size: the total number of tensors to be collectively accumulated. + Each must reside on a different device. + group_key: an integer identifying the group of devices. + instance_key: an integer identifying the participating group of Ops. + + Returns: + An Op implementing the distributed operation. + + Raises: + ValueError: if any of the input parameter constraints are not met. + """ + if not device.canonical_name(t.device): + raise ValueError('Device assignment required for collective ops') + if group_size <= 1: + raise ValueError('Parameter group_size to all_gather must be at least 2.') + dims = t.shape.as_list() + output_shape = [dims[0] * group_size] + dims[1:] + return gen_collective_ops.collective_gather(t, + shape=output_shape, + group_size=group_size, + group_key=group_key, + instance_key=instance_key) + + def broadcast_send(t, shape, dtype, group_size, group_key, instance_key): """Broadcasts one tensor to a group of others, across devices. diff --git a/tensorflow/python/ops/collective_ops_test.py b/tensorflow/python/ops/collective_ops_test.py index 9c5a39b90e..c9b376caf8 100644 --- a/tensorflow/python/ops/collective_ops_test.py +++ b/tensorflow/python/ops/collective_ops_test.py @@ -4,7 +4,7 @@ # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -25,8 +25,6 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import collective_ops from tensorflow.python.platform import test -# TODO(tucker): Make these ops work in eager mode. b/79776476 - class CollectiveOpTest(test.TestCase): @@ -114,6 +112,42 @@ class CollectiveOpTest(test.TestCase): def testCollectiveBroadcast(self): self._testCollectiveBroadcast([0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1]) + def _testCollectiveGather(self, t0, t1, expected, set_graph_key): + group_key = 1 + instance_key = 1 + with self.session( + config=config_pb2.ConfigProto(device_count={'CPU': 2})) as sess: + with ops.device('/CPU:0'): + in0 = constant_op.constant(t0) + colred0 = collective_ops.all_gather(in0, 2, group_key, instance_key) + with ops.device('/CPU:1'): + in1 = constant_op.constant(t1) + colred1 = collective_ops.all_gather(in1, 2, group_key, instance_key) + run_options = config_pb2.RunOptions() + if set_graph_key: + run_options.experimental.collective_graph_key = 1 + results = sess.run([colred0, colred1], options=run_options) + self.assertAllClose(results[0], expected, rtol=1e-5, atol=1e-5) + self.assertAllClose(results[1], expected, rtol=1e-5, atol=1e-5) + + @test_util.run_deprecated_v1 + def testCollectiveGather(self): + self._testCollectiveGather([0, 1, 2, 3, 4, 5, 6, 7], + [10, 11, 12, 13, 14, 15, 16, 17], + [0, 1, 2, 3, 4, 5, 6, 7, + 10, 11, 12, 13, 14, 15, 16, 17], + True) + self._testCollectiveGather([[0, 1, 2, 3], [4, 5, 6, 7]], + [[10, 11, 12, 13], [14, 15, 16, 17]], + [[0, 1, 2, 3], [4, 5, 6, 7], + [10, 11, 12, 13], [14, 15, 16, 17]], + True) + self._testCollectiveGather([[[0, 1], [2, 3]], [[4, 5], [6, 7]]], + [[[10, 11], [12, 13]], [[14, 15], [16, 17]]], + [[[0, 1], [2, 3]], [[4, 5], [6, 7]], + [[10, 11], [12, 13]], [[14, 15], [16, 17]]], + True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt index 144d564472..4bad3d51af 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt @@ -572,6 +572,10 @@ tf_module { name: "CollectiveBcastSend" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "CollectiveGather" + argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "CollectiveReduce" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt index 144d564472..4bad3d51af 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt @@ -572,6 +572,10 @@ tf_module { name: "CollectiveBcastSend" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "CollectiveGather" + argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'shape\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "CollectiveReduce" argspec: "args=[\'input\', \'group_size\', \'group_key\', \'instance_key\', \'merge_op\', \'final_op\', \'subdiv_offsets\', \'wait_for\'], varargs=None, keywords=None, defaults=None" -- GitLab From fb4a5700b0342b5240ad54ec23abe1d254d97e3b Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 13 Feb 2019 10:22:51 -0800 Subject: [PATCH 046/351] [XLA:Python] Don't require a Backend to call ComputationBuilder.Build(), instead require it on Computation.Compile(). XLA computations can be built irrespective of what backends exist ? ultimately they are building a protocol buffer and we do not need a backend or XLA client of any kind to build one. Leave code to forward a backend from Build() to Compile(); this can be removed after JAX is updated to pass the backend to Compile(). PiperOrigin-RevId: 233779679 --- tensorflow/compiler/xla/python/xla_client.py | 41 +++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index e8379a677a..fd65c9dc71 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -50,7 +50,7 @@ from tensorflow.compiler.xla.service import hlo_pb2 # which case we need to be able to detect when incompatible versions are # installed. def version(): - return (0, 1, 7) + return (0, 1, 8) _OP_METADATA_FIELDS = [ @@ -630,15 +630,15 @@ def transfer_from_outfeed(shape, replica_number=None): class Computation(object): - """Python wrapper for a local XLA Computation. + """Python wrapper for an XLA Computation. - A Computation can be compiled to form an Executable. Otherwise, it - can still be used as a Computation where required by the - ComputationBuilder methods. + A Computation can be compiled to form an Executable, or used as a + subcomputation in ComputationBuilder methods. """ - def __init__(self, c_computation, backend=XLA_LOCAL_BACKEND): + def __init__(self, c_computation, backend=None): self._c_computation = c_computation + # The backend argument is deprecated. Pass a backend to Compile() instead. self._backend = backend @property @@ -655,7 +655,8 @@ class Computation(object): proto = hlo_pb2.HloModuleProto.FromString(serialized) return proto - def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None): + def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None, + backend=None): """Compiles a computation. Computations are the result of a "ComputationBuild'ing" process. @@ -667,10 +668,12 @@ class Computation(object): compile_options: options to use for compilation, includes an optional laid out result shape for the computation. layout_fn: lambda that is used to lay out the argument/result shapes. + backend: a `Backend` for which an executable should be generated. Returns: A Executable instance. """ + backend = backend or self._backend or XLA_LOCAL_BACKEND result_shape = _wrap_shape(self.computation.GetReturnValueShape()) if layout_fn: @@ -683,18 +686,19 @@ class Computation(object): compile_options = compile_options or CompileOptions() compile_options.result_shape = result_shape - c = self._backend.compile(self.computation, argument_shapes, - compile_options) - return Executable(c, backend=self._backend) + c = backend.compile(self.computation, argument_shapes, compile_options) + return Executable(c, backend=backend) def CompileWithExampleArguments(self, arguments=(), compile_options=None, - layout_fn=None): + layout_fn=None, + backend=None): return self.Compile( argument_shapes=[Shape.from_pyval(arg) for arg in arguments], compile_options=compile_options, - layout_fn=layout_fn) + layout_fn=layout_fn, + backend=backend) def GetProgramShape(self): (arg_shapes, result_shape) = self._c_computation.GetProgramShape() @@ -714,7 +718,7 @@ class Computation(object): class Executable(object): """Python wrapper for an XLA Executable.""" - def __init__(self, c_executable, backend=XLA_LOCAL_BACKEND): + def __init__(self, c_executable, backend=None): self._c_executable = c_executable self._backend = backend @@ -818,7 +822,16 @@ class ComputationBuilder(object): self._client = c_api.ComputationBuilder(name.encode('utf8')) self._parameter_numbering = itertools.count() - def Build(self, root=None, backend=XLA_LOCAL_BACKEND): + def Build(self, root=None, backend=None): + """Builds a `Computation` from the contents of the builder. + + Args: + root: if not None, the operator containing the return value of the + computation. + backend: deprecated. Pass a `backend` to `Computation.Compile` instead. + Returns: + A `Computation`. + """ if root is not None: return Computation(self._client.BuildWithRoot(root), backend=backend) else: -- GitLab From 44c50ed1801e74500b493a156cbada712fb3806a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 13 Feb 2019 10:27:05 -0800 Subject: [PATCH 047/351] Support unaligned DT_STRING and DT_VARIANT tensors in `tensor::DeepCopy()`. PiperOrigin-RevId: 233780534 --- tensorflow/core/framework/tensor_util.cc | 4 +- tensorflow/core/framework/tensor_util_test.cc | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor_util.cc b/tensorflow/core/framework/tensor_util.cc index 65f6dc1c00..d6e2224ca3 100644 --- a/tensorflow/core/framework/tensor_util.cc +++ b/tensorflow/core/framework/tensor_util.cc @@ -37,10 +37,10 @@ Tensor DeepCopy(const Tensor& other) { other_data.size()); } } else if (other.dtype() == DT_STRING) { - tmp.flat() = other.flat(); + tmp.unaligned_flat() = other.unaligned_flat(); } else { CHECK_EQ(DT_VARIANT, other.dtype()); - tmp.flat() = other.flat(); + tmp.unaligned_flat() = other.unaligned_flat(); } return tmp; } diff --git a/tensorflow/core/framework/tensor_util_test.cc b/tensorflow/core/framework/tensor_util_test.cc index 2b4e1cad2f..c8545bca23 100644 --- a/tensorflow/core/framework/tensor_util_test.cc +++ b/tensorflow/core/framework/tensor_util_test.cc @@ -18,6 +18,9 @@ limitations under the License. #include #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" @@ -145,6 +148,68 @@ TEST(TensorUtil, DeepCopySlice) { } } +TEST(TensorUtil, DeepCopySliceString) { + Tensor x(DT_STRING, TensorShape({10})); + x.flat().setConstant("hello"); + + // Slice 'x' -- y still refers to the same buffer. + Tensor y = x.Slice(3, 7); + + // Do a deep copy of y, which is a slice. + Tensor z = tensor::DeepCopy(y); + + // Set x to be different. + x.flat().setConstant("goodbye"); + + EXPECT_EQ(TensorShape({10}), x.shape()); + EXPECT_EQ(TensorShape({4}), y.shape()); + EXPECT_EQ(TensorShape({4}), z.shape()); + EXPECT_EQ(DT_STRING, x.dtype()); + EXPECT_EQ(DT_STRING, y.dtype()); + EXPECT_EQ(DT_STRING, z.dtype()); + + // x and y should now all be 'goodbye', but z should be 'hello'. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ("goodbye", x.flat()(i)); + } + for (int i = 0; i < 4; ++i) { + EXPECT_EQ("goodbye", y.unaligned_flat()(i)); + EXPECT_EQ("hello", z.flat()(i)); + } +} + +TEST(TensorUtil, DeepCopySliceVariant) { + Tensor x(DT_VARIANT, TensorShape({10})); + x.flat().setConstant(Tensor(42.0f)); + + // Slice 'x' -- y still refers to the same buffer. + Tensor y = x.Slice(3, 7); + + // Do a deep copy of y, which is a slice. + Tensor z = tensor::DeepCopy(y); + + // Set x to be different. + x.flat().setConstant(Tensor("foo")); + + EXPECT_EQ(TensorShape({10}), x.shape()); + EXPECT_EQ(TensorShape({4}), y.shape()); + EXPECT_EQ(TensorShape({4}), z.shape()); + EXPECT_EQ(DT_VARIANT, x.dtype()); + EXPECT_EQ(DT_VARIANT, y.dtype()); + EXPECT_EQ(DT_VARIANT, z.dtype()); + + // Each element of x and y should now be a DT_STRING Tensor containing "foo", + // but each element of z should be a DT_FLOAT tensor containing 42.0. + for (int i = 0; i < 10; ++i) { + EXPECT_EQ("foo", x.flat()(i).get()->scalar()()); + } + for (int i = 0; i < 4; ++i) { + EXPECT_EQ("foo", + y.unaligned_flat()(i).get()->scalar()()); + EXPECT_EQ(42.0, z.flat()(i).get()->scalar()()); + } +} + TEST(TensorUtil, Concat) { std::vector sizes = {1, 4, 5}; std::vector to_concat; -- GitLab From ea474984f1b179185c3c75dcf6fb5908869d7755 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 10:35:06 -0800 Subject: [PATCH 048/351] Adding back no_pip and nopip tags. Removing gpu related tags. PiperOrigin-RevId: 233782226 --- tensorflow/tools/ci_build/builds/pip_new.sh | 22 ++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh index 3ee3e79463..cb3853c5df 100755 --- a/tensorflow/tools/ci_build/builds/pip_new.sh +++ b/tensorflow/tools/ci_build/builds/pip_new.sh @@ -17,7 +17,7 @@ # the package. # # Usage: -# pip.sh +# pip_new.sh # # Required step(s): # Run configure.py prior to running this script. @@ -34,7 +34,7 @@ # --build_tests_only --test_output=errors" # TF_TEST_FILTER_TAGS: Filtering tags for bazel tests. More specifically, # input tags for `--test_filter_tags` flag. -# e.g. TF_TEST_FILTER_TAGS="no-pip,-nomac,no_oss" +# e.g. TF_TEST_FILTER_TAGS="no_pip,-nomac,no_oss" # TF_TEST_TARGETS: Bazel test targets. # e.g. TF_TEST_TARGETS="//tensorflow/contrib/... \ # //tensorflow/... \ @@ -45,7 +45,7 @@ # test_pip_virtualenv_oss_serial" # IS_NIGHTLY: Nightly run flag. # e.g. IS_NIGHTLY=1 # nightly runs -# IS_NIGHTLY=0 # non-nightly runs +# e.g. IS_NIGHTLY=0 # non-nightly runs # TF_PROJECT_NAME: Name of the project. This string will be pass onto # the wheel file name. For nightly builds, it will be # overwritten to 'tf_nightly'. For gpu builds, '_gpu' @@ -140,16 +140,13 @@ update_bazel_flags() { update_test_filter_tags() { # Add test filter tags + # This script is for validating built PIP packages. Add pip tags. + add_test_filter_tag -no_pip -nopip # MacOS filter tags if [[ ${OS_TYPE} == "macos" ]]; then remove_test_filter_tag nomac no_mac add_test_filter_tag -nomac -no_mac fi - # GPU or CPU tags - if [[ "${CONTAINER_TYPE}" == "gpu" ]]; then - remove_test_filter_tag no_gpu -requires-gpu - add_test_filter_tag requires-gpu - fi echo "Final test filter tags: ${BAZEL_TEST_FILTER_TAGS}" } @@ -251,7 +248,13 @@ INSTALL_EXTRA_PIP_PACKAGES=${TF_BUILD_INSTALL_EXTRA_PIP_PACKAGES} # Build TF PIP Package ########################################################################### -# First, check that global variables are properly set. +# First remove any already existing binaries for a clean start and test. +if [[ -d ${PIP_TEST_ROOT} ]]; then + echo "Test root directory ${PIP_TEST_ROOT} already exists. Deleting it." + sudo rm -rf ${PIP_TEST_ROOT} +fi + +# Check that global variables are properly set. check_global_vars # Check if in a virtualenv and exit if yes. @@ -288,6 +291,7 @@ test_pip_virtualenv_clean() { # activate virtual environment and install tensorflow with PIP. create_activate_virtualenv --clean "${CLEAN_VENV_DIR}" + # Install TF with pip install_tensorflow_pip "${WHL_PATH}" # cd to a temporary directory to avoid picking up Python files in the source -- GitLab From 8db35dbd8916efc6bc0766f0ccf0573cb49d2131 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Wed, 13 Feb 2019 10:38:39 -0800 Subject: [PATCH 049/351] [Grappler] Don't exclude repeating trailing denormal numbers when packing tensors. PiperOrigin-RevId: 233783060 --- .../grappler/optimizers/constant_folding.cc | 19 ++++++++++- .../optimizers/constant_folding_test.cc | 33 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 5d36d26f0d..e626943ee6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -17,6 +17,8 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include + #include "absl/strings/string_view.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/attr_value.pb.h" @@ -168,6 +170,21 @@ bool HasTPUAttributes(const NodeDef& node) { return false; } +template +bool IsDenormal(T x) { + return false; +} + +template <> +bool IsDenormal(float x) { + return !std::isnormal(x); +} + +template <> +bool IsDenormal(double x) { + return !std::isnormal(x); +} + } // namespace ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, @@ -1018,7 +1035,7 @@ Status ConstantFolding::CreateNodeDef(const string& name, int64 last_index = 0; \ for (int64 i = 0; i < tensor->NumElements(); ++i) { \ TYPE cur = *val_ptr++; \ - if (cur != last) { \ + if (cur != last || IsDenormal(cur)) { \ last = cur; \ last_index = i; \ } \ diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 3a159707be..76e149d0ae 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -3683,6 +3683,39 @@ TEST_F(ConstantFoldingTest, MaterializeConstantValuedNode) { } } +TEST_F(ConstantFoldingTest, BitcastDenormalFloats) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + Tensor x_t(DT_INT64, TensorShape({2, 2})); + x_t.flat()(0) = 9223372036854775807L; + x_t.flat()(1) = 1L; + x_t.flat()(2) = 9223372036854775807L; + x_t.flat()(3) = 1L; + Output x = ops::Const(scope.WithOpName("x"), x_t); + Output y = ops::Bitcast(scope.WithOpName("y"), x, DT_FLOAT); + Output z = ops::Bitcast(scope.WithOpName("z"), y, DT_INT64); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch = {"z"}; + auto tensors_expected = EvaluateNodes(item.graph, item.fetch, {}); + + ConstantFolding optimizer(/*cpu_device=*/nullptr); + GraphDef output; + Status status = optimizer.Optimize(/*cluster=*/nullptr, item, &output); + TF_EXPECT_OK(status); + + ASSERT_EQ(output.node_size(), 1); + const NodeDef& node = output.node(0); + EXPECT_EQ(node.name(), "z"); + EXPECT_EQ(node.op(), "Const"); + + auto tensors = EvaluateNodes(output, item.fetch, {}); + ASSERT_EQ(tensors.size(), 1); + ASSERT_EQ(tensors_expected.size(), 1); + test::ExpectTensorEqual(tensors[0], tensors_expected[0]); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From cf4829435cbed91853a850e17b531f212e374fde Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Wed, 13 Feb 2019 10:45:31 -0800 Subject: [PATCH 050/351] Distribution Strategies: Unit test for regularizer loss scaling. PiperOrigin-RevId: 233784687 --- .../contrib/distribute/python/keras_test.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index 2eca1d1877..f298b94937 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import distributed_training_utils from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras +from tensorflow.python.ops import array_ops from tensorflow.python.ops.parsing_ops import gen_parsing_ops from tensorflow.python.platform import gfile from tensorflow.python.summary.writer import writer_cache @@ -1184,5 +1185,53 @@ class TestDistributionStrategyWithDatasets(test.TestCase, atol=1e-4, rtol=1e-4) +class TestRegularizerLoss(test.TestCase, parameterized.TestCase): + class IdentityRegularizer(keras.regularizers.Regularizer): + + def __call__(self, x): + return array_ops.identity(x) + + class AddLayer(keras.layers.Layer): + + def build(self, _): + self.v = self.add_weight( + 'v', (), initializer='ones', + regularizer=TestRegularizerLoss.IdentityRegularizer()) + + def call(self, inputs): + return inputs + self.v + + @staticmethod + def loss_fn(_, y_pred): + return y_pred + + @combinations.generate(all_strategy_combinations_minus_default()) + def test_regularizer_loss(self, distribution): + batch_size = 2 + if not distributed_training_utils.global_batch_size_supported(distribution): + batch_size //= distribution.num_replicas_in_sync + + # Given an input x, which is always 1, and variable v, this model computes + # Loss=x+v+regularizer_loss, where regularizer_loss=v and the variable is + # initialized to 1. Therefore, this model computes Loss=1+2v, and so the + # gradient dLoss/dv = 2. This gradient of 2 is averaged over all examples + # in a batch and then multiplied by the learning rate of 1. As a result, + # the model update for one batch should subtract 2 from v, resulting in v + # being -1. If the regularizer loss is not scaled correctly by number of + # replicas, the variable value will be incorrect when number of replicas + # >1. For e.g. it will be -2 if num replicas = 2. + with distribution.scope(): + x = keras.layers.Input(shape=(), batch_size=batch_size) + y = TestRegularizerLoss.AddLayer()(x) + model = keras.models.Model(inputs=x, outputs=y) + opt = gradient_descent_keras.SGD(1.) + model.compile(opt, loss=TestRegularizerLoss.loss_fn) + model.fit(x=np.array([1., 1.], dtype=np.float32), + y=np.array([1., 1.], dtype=np.float32), + batch_size=batch_size) + v = model.get_weights()[0] + self.assertEqual(-1.0, v) + + if __name__ == '__main__': test.main() -- GitLab From d97ce58d0da919ac6497ca267182f8659625392c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 10:46:14 -0800 Subject: [PATCH 051/351] Remove unused variable. PiperOrigin-RevId: 233784859 --- tensorflow/python/ops/nn_grad.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index a3d3c7b4ef..9914a2f585 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -309,7 +309,6 @@ def _BiasAddGradGrad(op, received_grad): data_format = None shape = array_ops.shape(op.inputs[0]) - rank = array_ops.rank(op.inputs[0]) bias_shape = array_ops.shape(received_grad) if data_format == b"NCHW": -- GitLab From a2e92ad4fed370efe288dc318998f0b2c572c4f2 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 13 Feb 2019 11:53:55 -0800 Subject: [PATCH 052/351] Fix comment and rename ToVector --- .../tf2tensorrt/convert/convert_nodes.cc | 17 ++++++++--------- .../tf2tensorrt/convert/convert_nodes.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 17a9581bc3..b4cb70a556 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2363,8 +2363,8 @@ tensorflow::Status ConvertSlice(OpConverterParams* params) { const auto& node_def = params->node_def; TF_RETURN_IF_ERROR(CheckInputsWeights( *params, {{"input", false}, {"begin", true}, {"size", true}})); - std::vector begin = inputs.at(1).weights().CopyToVector(); - std::vector size = inputs.at(2).weights().CopyToVector(); + std::vector begin = inputs.at(1).weights().ToVector(); + std::vector size = inputs.at(2).weights().ToVector(); // Get input dims. nvinfer1::Dims dims = inputs.at(0).GetTrtDims(); std::vector input_dims(dims.d, dims.d + dims.nbDims); @@ -2413,9 +2413,9 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { // Add batch dimension so that indexes line up properly. input_dims.insert(input_dims.begin(), inputs.at(0).batch_size()); // Get begin and end bounds per axis. - std::vector begin = inputs.at(1).weights().CopyToVector(); - std::vector end = inputs.at(2).weights().CopyToVector(); - std::vector stride = inputs.at(3).weights().CopyToVector(); + std::vector begin = inputs.at(1).weights().ToVector(); + std::vector end = inputs.at(2).weights().ToVector(); + std::vector stride = inputs.at(3).weights().ToVector(); if (!AllLengthsEqual({input_dims, begin, end, stride})) { return tensorflow::errors::InvalidArgument( "Length of begin, end, and stride arguments must equal rank of input " @@ -2437,10 +2437,9 @@ tensorflow::Status ConvertStridedSlice(OpConverterParams* params) { // Check that batch dimension is unmodified. const bool begin_is_modified = !(begin_mask & 1) && begin[0] != 0; const bool stride_is_modified = stride[0] != 1; - // If the batch size is -1 and the - // If end mask is not set, we can only know if the batch dimension is - // unmodified when the batch size is defined. When the batch size is - // undefined, we don't convert to be safe. + // If the batch size is -1 and the end mask is not set, we can only know if + // the batch dimension is unmodified when the batch size is defined. When the + // batch size is undefined, we don't convert to be safe. const bool batch_size_is_defined = input_dims[0] > 0; const bool end_is_modified = !(end_mask & 1) && (!batch_size_is_defined || diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index bcbad579f7..45edafd2be 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -196,7 +196,7 @@ class TRT_ShapedWeights { } template - std::vector CopyToVector() const { + std::vector ToVector() const { auto span = GetSpan(); return std::vector(span.data(), span.data() + span.size()); } -- GitLab From 0df5d678671baade46e437cd31828936fbe138bb Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 13 Feb 2019 11:07:00 -0800 Subject: [PATCH 053/351] Disable broken classifier_metrics_test PiperOrigin-RevId: 233789823 --- tensorflow/contrib/gan/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index db0868fb2c..386e4cf69b 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -377,7 +377,10 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows + tags = [ + "no_pip", + "no_windows", + ], deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", -- GitLab From 62be03c88e3af45edc3f6f0461089a9ba9df092e Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 13 Feb 2019 11:16:11 -0800 Subject: [PATCH 054/351] Add direct dependencies to op libraries. PiperOrigin-RevId: 233791917 --- tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD | 9 +++++++++ tensorflow/contrib/util/BUILD | 2 ++ 2 files changed, 11 insertions(+) diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD index d319aa7986..92016e6a83 100644 --- a/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD +++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/BUILD @@ -19,16 +19,25 @@ tf_cc_binary( "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:candidate_sampling_ops_op_lib", "//tensorflow/core:control_flow_ops_op_lib", + "//tensorflow/core:data_flow_ops_op_lib", "//tensorflow/core:framework_internal", "//tensorflow/core:functional_ops_op_lib", + "//tensorflow/core:io_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:list_ops_op_lib", + "//tensorflow/core:logging_ops_op_lib", + "//tensorflow/core:lookup_ops_op_lib", "//tensorflow/core:manip_ops_op_lib", "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core:no_op_op_lib", + "//tensorflow/core:parsing_ops_op_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:random_ops_op_lib", "//tensorflow/core:remote_fused_graph_ops_op_lib", + "//tensorflow/core:sendrecv_ops_op_lib", + "//tensorflow/core:sparse_ops_op_lib", + "//tensorflow/core:state_ops_op_lib", "//tensorflow/core:string_ops_op_lib", "//tensorflow/core:training_ops_op_lib", "//tensorflow/core:user_ops_op_lib", diff --git a/tensorflow/contrib/util/BUILD b/tensorflow/contrib/util/BUILD index 07dbd5ca8d..ada08f95ae 100644 --- a/tensorflow/contrib/util/BUILD +++ b/tensorflow/contrib/util/BUILD @@ -22,7 +22,9 @@ cc_library( "//tensorflow/core:functional_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:nn_ops_op_lib", + "//tensorflow/core:no_op_op_lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core:sendrecv_ops_op_lib", "//tensorflow/core:tensorflow", "//tensorflow/core/kernels:immutable_constant_op", ], -- GitLab From cbc72d5c35e32c85b1d03f9c2d33f2cbb1c71a1c Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 13 Feb 2019 12:40:44 -0800 Subject: [PATCH 055/351] Add comment about no-op slice and unit tests --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index b4cb70a556..4f9316e63f 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -2250,7 +2250,12 @@ tensorflow::Status ConvertStridedSliceHelper(OpConverterParams* params, } } if (pad_dims.empty()) { - // No dimensions are changed. Create a no-op layer so tests don't break. + // No dimensions are changed, so this is a no-op. We could just return the + // input without creating a new layer. TRT will crash if an empty engine + // with no layers is attempted to be created, so we add a no-op shuffle to + // prevent our unit tests from breaking. + // TODO(tmorris): Allow empty engines in the unit tests and return the input + // as output here. if (params->validation_only) return Status::OK(); nvinfer1::IShuffleLayer* layer = params->converter->network()->addShuffle( *const_cast(input.tensor())); -- GitLab From e09fa816e71fa38086dee3fad2b98eb40f2abf12 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 11:27:53 -0800 Subject: [PATCH 056/351] [TF:XLA] Enable RNN test on XLA and change floating point comparison Change comparisons that expected floating point results to be equal, to just near because it relies on the same CUDNN algorithm being chosen. PiperOrigin-RevId: 233794486 --- tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/rnn_cell_test.py | 40 +++++++++---------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 75a8fa2a68..e142bbb330 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2945,6 +2945,7 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 10, + xla_enable_strict_auto_jit = True, ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py index 8492f10245..b8dfbfc942 100644 --- a/tensorflow/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/python/kernel_tests/rnn_cell_test.py @@ -1284,12 +1284,12 @@ class LSTMTest(test.TestCase): self.assertEqual(len(values_static), len(values_dynamic)) for (value_static, value_dynamic) in zip(values_static, values_dynamic): - self.assertAllEqual(value_static, value_dynamic) - self.assertAllEqual(state_value_static, state_value_dynamic) + self.assertAllClose(value_static, value_dynamic) + self.assertAllClose(state_value_static, state_value_dynamic) if in_graph_mode: - self.assertAllEqual(static_grad_values, dynamic_grad_values) + self.assertAllClose(static_grad_values, dynamic_grad_values) self.assertEqual( len(static_individual_grad_values), @@ -1301,14 +1301,14 @@ class LSTMTest(test.TestCase): for i, (a, b) in enumerate( zip(static_individual_grad_values, dynamic_individual_grad_values)): tf_logging.info("Comparing individual gradients iteration %d" % i) - self.assertAllEqual(a, b) + self.assertAllClose(a, b) for i, (a, b) in enumerate( zip(static_individual_var_grad_values, dynamic_individual_var_grad_values)): tf_logging.info( "Comparing individual variable gradients iteration %d" % i) - self.assertAllEqual(a, b) + self.assertAllClose(a, b) @test_util.run_in_graph_and_eager_modes def testDynamicEquivalentToStaticRNN(self): @@ -1383,27 +1383,27 @@ class BidirectionalRNNTest(test.TestCase): # # First sequence in batch is length=2 # Check that the time=0 forward output is equal to time=1 backward output - self.assertEqual(out[0][0][0], out[1][0][3]) - self.assertEqual(out[0][0][1], out[1][0][4]) - self.assertEqual(out[0][0][2], out[1][0][5]) + self.assertAllClose(out[0][0][0], out[1][0][3]) + self.assertAllClose(out[0][0][1], out[1][0][4]) + self.assertAllClose(out[0][0][2], out[1][0][5]) # Check that the time=1 forward output is equal to time=0 backward output - self.assertEqual(out[1][0][0], out[0][0][3]) - self.assertEqual(out[1][0][1], out[0][0][4]) - self.assertEqual(out[1][0][2], out[0][0][5]) + self.assertAllClose(out[1][0][0], out[0][0][3]) + self.assertAllClose(out[1][0][1], out[0][0][4]) + self.assertAllClose(out[1][0][2], out[0][0][5]) # Second sequence in batch is length=3 # Check that the time=0 forward output is equal to time=2 backward output - self.assertEqual(out[0][1][0], out[2][1][3]) - self.assertEqual(out[0][1][1], out[2][1][4]) - self.assertEqual(out[0][1][2], out[2][1][5]) + self.assertAllClose(out[0][1][0], out[2][1][3]) + self.assertAllClose(out[0][1][1], out[2][1][4]) + self.assertAllClose(out[0][1][2], out[2][1][5]) # Check that the time=1 forward output is equal to time=1 backward output - self.assertEqual(out[1][1][0], out[1][1][3]) - self.assertEqual(out[1][1][1], out[1][1][4]) - self.assertEqual(out[1][1][2], out[1][1][5]) + self.assertAllClose(out[1][1][0], out[1][1][3]) + self.assertAllClose(out[1][1][1], out[1][1][4]) + self.assertAllClose(out[1][1][2], out[1][1][5]) # Check that the time=2 forward output is equal to time=0 backward output - self.assertEqual(out[2][1][0], out[0][1][3]) - self.assertEqual(out[2][1][1], out[0][1][4]) - self.assertEqual(out[2][1][2], out[0][1][5]) + self.assertAllClose(out[2][1][0], out[0][1][3]) + self.assertAllClose(out[2][1][1], out[0][1][4]) + self.assertAllClose(out[2][1][2], out[0][1][5]) # Via the reasoning above, the forward and backward final state should be # exactly the same self.assertAllClose(s_fw, s_bw) -- GitLab From 6f4b327252f1941270d06e4855197be914793e73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 11:31:10 -0800 Subject: [PATCH 057/351] Add unit tests to check behavior of using Keras subclassed-models with Distribution Strategy. PiperOrigin-RevId: 233795267 --- .../contrib/distribute/python/keras_test.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index f298b94937..cc9cee31be 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -69,6 +69,20 @@ def simple_functional_model(): return model +def simple_subclassed_model(num_labels=_NUM_CLASS): + + class _SimpleMLP(keras.Model): + + def __init__(self, num_labels): + super(_SimpleMLP, self).__init__() + self.dense = keras.layers.Dense(num_labels) + + def call(self, inputs): + return self.dense(inputs) + + return _SimpleMLP(num_labels) + + def simple_multi_inputs_multi_outputs_model(): input_a = keras.layers.Input(shape=(16,), name='input_a') input_b = keras.layers.Input(shape=(16,), name='input_b') @@ -1233,5 +1247,61 @@ class TestRegularizerLoss(test.TestCase, parameterized.TestCase): self.assertEqual(-1.0, v) +class TestDistributionStrategyWithKerasModels(test.TestCase, + parameterized.TestCase): + + @combinations.generate(all_strategy_combinations()) + def test_distribution_strategy_on_sequential_model(self, distribution): + with distribution.scope(): + model = simple_sequential_model() + optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + model.compile(optimizer, loss) + + inputs = np.zeros((20, 10), np.float32) + targets = np.zeros((20, 2), np.float32) + + model.fit(inputs, targets, epochs=1, steps_per_epoch=2) + model.predict(inputs, steps=1) + model.evaluate(inputs, targets, steps=1) + + @combinations.generate(all_strategy_combinations()) + def test_distribution_strategy_on_functional_model(self, distribution): + with distribution.scope(): + model = get_model() + optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + model.compile(optimizer, loss) + + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 4), dtype=np.float32) + + model.fit(inputs, targets, epochs=1, steps_per_epoch=2) + model.predict(inputs, steps=1) + model.evaluate(inputs, targets, steps=1) + + # TODO(b/124377929): Remove error assertions once subclassed models + # are supported in DistributedStrategy. + @combinations.generate(all_strategy_combinations_minus_default()) + def test_distribution_strategy_on_subclassed_model(self, distribution): + with distribution.scope(): + model = simple_subclassed_model() + optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) + loss = 'mse' + model.compile(optimizer, loss) + + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 2), dtype=np.float32) + + with self.assertRaisesRegexp(AttributeError, 'has no attribute'): + model.fit(inputs, targets, epochs=1, steps_per_epoch=2) + + with self.assertRaisesRegexp(AttributeError, 'has no attribute'): + model.predict(inputs, steps=1) + + with self.assertRaisesRegexp(AttributeError, 'has no attribute'): + model.evaluate(inputs, targets, steps=1) + + if __name__ == '__main__': test.main() -- GitLab From e02e43f65867e8d5e0a5f90b893c41c076ed848f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 13 Feb 2019 12:20:55 -0800 Subject: [PATCH 058/351] Remove difference in output types between eager and graph while_loop. PiperOrigin-RevId: 233805317 --- tensorflow/python/kernel_tests/control_flow_ops_py_test.py | 4 ++++ tensorflow/python/ops/control_flow_ops.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 7b14145bd4..655eac7a15 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -1785,6 +1785,8 @@ class ControlFlowTest(test.TestCase): @test_util.disable_control_flow_v2("b/116328420 (RaggedTensor)") def testWhileShapeInferenceRaggedTensor(self): + if context.executing_eagerly(): + self.skipTest("b/116328420") i = constant_op.constant(0) x = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]]) c = lambda i, _: i < 10 @@ -1828,6 +1830,8 @@ class ControlFlowTest(test.TestCase): @test_util.disable_control_flow_v2("b/116328420 (RaggedTensor)") def testWhileShapeInferenceRaggedTensorRaggedRank2(self): + if context.executing_eagerly(): + self.skipTest("b/116328420") i = constant_op.constant(0) x = ragged_factory_ops.constant([[[1, 2], [3], [4, 5, 6]], [[], [8, 9, 10]]]) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index e0b83c490c..9726cd9d9e 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3443,6 +3443,12 @@ def while_loop(cond, if try_to_pack and not isinstance(loop_vars, (list, _basetuple)): packed = True loop_vars = (loop_vars,) + + def convert(x): + if isinstance(x, tensor_array_ops.TensorArray): + return x + return ops.convert_to_tensor(x) + loop_vars = nest.map_structure(convert, loop_vars) if maximum_iterations is not None: return loop_vars[1] else: -- GitLab From 142bd9fc372e0f33af738ce01cf38c8757176465 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 12:26:26 -0800 Subject: [PATCH 059/351] Disable flaky test. PiperOrigin-RevId: 233806315 --- tensorflow/python/kernel_tests/random/random_grad_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/random/random_grad_test.py b/tensorflow/python/kernel_tests/random/random_grad_test.py index aac6eeac06..6c125c8b9c 100644 --- a/tensorflow/python/kernel_tests/random/random_grad_test.py +++ b/tensorflow/python/kernel_tests/random/random_grad_test.py @@ -218,7 +218,8 @@ class RandomGammaGradTest(test.TestCase): self.assertAllClose(dsample_dalpha_val, [1.0] * 3, atol=1e-1, rtol=1e-1) @test_util.run_deprecated_v1 - def testQuadraticLoss(self): + # TODO(b/124379202): Re-enable this test when not flaky. + def DISABLED_testQuadraticLoss(self): """Statistical test for the gradient. The equation (5) of https://arxiv.org/abs/1805.08498 says -- GitLab From 71e62242b58e03bcc566bce6f46374dec0f00dc4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 12:31:46 -0800 Subject: [PATCH 060/351] Disable flaky test. PiperOrigin-RevId: 233807324 --- tensorflow/python/debug/lib/source_remote_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py index 29add425e9..fe0323692d 100644 --- a/tensorflow/python/debug/lib/source_remote_test.py +++ b/tensorflow/python/debug/lib/source_remote_test.py @@ -201,7 +201,8 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): self._server.query_source_file_line(tf_trace_file_path, 0) - def testSendEagerTracebacksToSingleDebugServer(self): + # TODO(b/124381153): Re-enable this test when not flaky. + def DISABLED_testSendEagerTracebacksToSingleDebugServer(self): this_func_name = "testSendEagerTracebacksToSingleDebugServer" send_traceback = traceback.extract_stack() send_lineno = line_number_above() -- GitLab From ad78d08bd1d03238d434d3d214a1de56f22d7601 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 12:35:06 -0800 Subject: [PATCH 061/351] [TF:XLA] Mark a test as not running on XLA due to different flows. The test is not run because it is to test a part of grappler not used with XLA:GPU. This is change is a no-op. It makes explicit a default value and adds comments. PiperOrigin-RevId: 233807945 --- tensorflow/python/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index d9d800f754..6f263096f9 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -5797,6 +5797,8 @@ cuda_py_test( "grappler", "no_pip", # tf_optimizer is not available in pip. ], + # This test will not run on XLA because it primarily tests the TF Classic flow. + xla_enable_strict_auto_jit = False, ) py_library( @@ -5937,6 +5939,8 @@ cuda_py_test( tags = [ "grappler", ], + # This test will not run on XLA because it primarily tests the TF Classic flow. + xla_enable_strict_auto_jit = False, ) py_library( -- GitLab From 5c57cdc12b6de258cad268072a32e706ccc11e11 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Wed, 13 Feb 2019 13:00:33 -0800 Subject: [PATCH 062/351] Refactor (Extract methods) and improve model.compile() readability. PiperOrigin-RevId: 233812403 --- tensorflow/python/keras/engine/training.py | 271 ++++++++---------- .../python/keras/engine/training_test.py | 128 ++++++++- .../python/keras/engine/training_utils.py | 89 ++++++ 3 files changed, 334 insertions(+), 154 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 37f6f79a14..5901f05e20 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -159,11 +159,12 @@ class Model(Network): Arguments: optimizer: String (name of optimizer) or optimizer instance. See `tf.keras.optimizers`. - loss: String (name of objective function) or objective function. - See `tf.losses`. If the model has multiple outputs, you can use a - different loss on each output by passing a dictionary or a list of - losses. The loss value that will be minimized by the model - will then be the sum of all individual losses. + loss: String (name of objective function), objective function or + `tf.losses.Loss` instance. See `tf.losses`. If the model has + multiple outputs, you can use a different loss on each output by + passing a dictionary or a list of losses. The loss value that will + be minimized by the model will then be the sum of all individual + losses. metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use `metrics=['accuracy']`. @@ -288,79 +289,30 @@ class Model(Network): return self._is_compiled = True - # Prepare loss functions. - if isinstance(loss, dict): - for name in loss: - if name not in self.output_names: - raise ValueError( - 'Unknown entry in loss ' - 'dictionary: "' + name + '". ' - 'Only expected the following keys: ' + str(self.output_names)) - loss_functions = [] - for name in self.output_names: - if name not in loss: - logging.warning( - 'Output "' + name + - '" missing from loss dictionary. We assume ' - 'this was done on purpose. The fit and evaluate APIs will not be ' - 'expecting any data to be passed to "' + name + '".') - loss_functions.append(training_utils.get_loss_function(loss.get(name))) - elif isinstance(loss, list): - if len(loss) != len(self.outputs): - raise ValueError('When passing a list as loss, ' - 'it should have one entry per model outputs. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed loss=' + str(loss)) - loss_functions = [training_utils.get_loss_function(l) for l in loss] - else: - loss_functions = [ - training_utils.get_loss_function(loss) - for _ in range(len(self.outputs)) - ] - self.loss_functions = loss_functions + # Prepare list of loss functions, same size of model outputs. + self.loss_functions = training_utils.prepare_loss_functions( + loss, self.output_names) - skip_target_indices = [] - skip_target_weighing_indices = [] self._feed_outputs = [] self._feed_output_names = [] self._feed_output_shapes = [] self._feed_loss_fns = [] - for i in range(len(loss_functions)): - if loss_functions[i] is None: + # if loss function is None, then this output will be skipped during total + # loss calculation and feed targets preparation. + skip_target_indices = [] + skip_target_weighing_indices = [] + for i, loss_function in enumerate(self.loss_functions): + if loss_function is None: skip_target_indices.append(i) skip_target_weighing_indices.append(i) # Prepare output masks. if not self.run_eagerly: masks = [getattr(x, '_keras_mask', None) for x in self.outputs] - if not isinstance(masks, list): - masks = [masks] - - # Prepare loss weights. - if loss_weights is None: - loss_weights_list = [1. for _ in range(len(self.outputs))] - elif isinstance(loss_weights, dict): - for name in loss_weights: - if name not in self.output_names: - raise ValueError( - 'Unknown entry in loss_weights ' - 'dictionary: "' + name + '". ' - 'Only expected the following keys: ' + str(self.output_names)) - loss_weights_list = [] - for name in self.output_names: - loss_weights_list.append(loss_weights.get(name, 1.)) - elif isinstance(loss_weights, list): - if len(loss_weights) != len(self.outputs): - raise ValueError( - 'When passing a list as loss_weights, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed loss_weights=' + str(loss_weights)) - loss_weights_list = loss_weights - else: - raise TypeError('Could not interpret loss_weights argument: ' + - str(loss_weights) + ' - expected a list of dicts.') - self.loss_weights_list = loss_weights_list + + # Prepare list loss weights, same size of model outputs. + self.loss_weights_list = training_utils.prepare_loss_weights( + self.output_names, loss_weights) # Initialization for Eager mode execution. if self.run_eagerly: @@ -465,91 +417,7 @@ class Model(Network): # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) + # loss_weight_2 * output_2_loss_fn(...) + # layer losses. - total_loss = None - with K.name_scope('loss'): - for i in range(len(self.outputs)): - if i in skip_target_indices: - continue - y_true = self.targets[i] - y_pred = self.outputs[i] - loss_fn = loss_functions[i] - sample_weight = self.sample_weights[i] - mask = masks[i] - loss_weight = loss_weights_list[i] - with K.name_scope(self.output_names[i] + '_loss'): - if mask is not None: - mask = math_ops.cast(mask, y_pred.dtype) - # Update weights with mask. - if sample_weight is None: - sample_weight = mask - else: - # Update dimensions of weights to match with mask if possible. - mask, _, sample_weight = ( - losses_utils.squeeze_or_expand_dimensions( - mask, None, sample_weight)) - sample_weight *= mask - - # Reset reduction on the loss so that we can get the per sample loss - # value. We use this to get both the stateless and stateful loss - # values without having to compute the underlying loss function - # twice. - weighted_losses = None - if hasattr(loss_fn, 'reduction'): - current_loss_reduction = loss_fn.reduction - loss_fn.reduction = losses_utils.ReductionV2.NONE - weighted_losses = loss_fn( - y_true, y_pred, sample_weight=sample_weight) - loss_fn.reduction = current_loss_reduction - - # Compute the stateless loss value. - output_loss = losses_utils.reduce_weighted_loss( - weighted_losses, reduction=current_loss_reduction) - else: - # Compute the stateless loss value for a custom loss class. - # Here we assume that the class takes care of loss reduction - # because if this class returns a vector value we cannot - # differentiate between use case where a custom optimizer - # expects a vector loss value vs unreduced per-sample loss value. - output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight) - - if len(self.outputs) > 1: - # Keep track of the un-aggregated loss result tensor. - output_name = self.output_names[i] + '_loss' - self._compile_metrics_tensors[output_name] = output_loss - - # Keep track of stateful result tensor and function for the loss. - # Compute the stateful loss value. - if weighted_losses is not None: - # TODO(b/120571621): Directly call metric when the bug is fixed. - aggregated_output_loss = self._call_fn_for_each_replica( - self._output_loss_metrics[i], weighted_losses) - else: - # Custom loss class. - aggregated_output_loss = self._call_metric_fn( - self._output_loss_metrics[i], y_true, y_pred, sample_weight) - self._compile_stateful_metrics_tensors[ - output_name] = aggregated_output_loss - self._compile_stateful_metric_functions.append( - self._output_loss_metrics[i]) - - if total_loss is None: - total_loss = loss_weight * output_loss - else: - total_loss += loss_weight * output_loss - if total_loss is None: - if not self.losses: - raise ValueError('The model cannot be compiled ' - 'because it has no loss to optimize.') - else: - total_loss = 0. - - # Add regularization penalties and other layer-specific losses. - if self.losses: - total_loss += losses_utils.scale_loss_for_distribution( - math_ops.add_n(self.losses)) - - # Prepare gradient updates and state updates. - self.total_loss = total_loss + self.total_loss = self._prepare_total_loss(skip_target_indices, masks) # Functions for train, test and predict will # be compiled lazily when required. @@ -1744,6 +1612,105 @@ class Model(Network): verbose=verbose, callbacks=callbacks) + def _prepare_total_loss(self, skip_target_indices=None, masks=None): + """Computes total loss from loss functions. + + Arguments: + skip_target_indices: A list of indices of model outputs where loss + function is None. + masks: List of mask values corresponding to each model output. + + Returns: + A list of loss weights of python floats. + + Raises: + TypeError: If model run_eagerly is True. + """ + if self.run_eagerly: + raise TypeError('total loss can not be computed when compiled with ' + 'run_eagerly = True.') + skip_target_indices = skip_target_indices or [] + total_loss = None + with K.name_scope('loss'): + zipped_inputs = zip(self.targets, self.outputs, self.loss_functions, + self.sample_weights, masks, self.loss_weights_list) + for i, (y_true, y_pred, loss_fn, sample_weight, mask, + loss_weight) in enumerate(zipped_inputs): + if i in skip_target_indices: + continue + loss_name = self.output_names[i] + '_loss' + with K.name_scope(loss_name): + if mask is not None: + mask = math_ops.cast(mask, y_pred.dtype) + # Update weights with mask. + if sample_weight is None: + sample_weight = mask + else: + # Update dimensions of weights to match with mask if possible. + mask, _, sample_weight = ( + losses_utils.squeeze_or_expand_dimensions( + mask, None, sample_weight)) + sample_weight *= mask + + # Reset reduction on the loss so that we can get the per sample loss + # value. We use this to get both the stateless and stateful loss + # values without having to compute the underlying loss function + # twice. + weighted_losses = None + if hasattr(loss_fn, 'reduction'): + current_loss_reduction = loss_fn.reduction + loss_fn.reduction = losses_utils.ReductionV2.NONE + weighted_losses = loss_fn( + y_true, y_pred, sample_weight=sample_weight) + loss_fn.reduction = current_loss_reduction + + # Compute the stateless loss value. + output_loss = losses_utils.reduce_weighted_loss( + weighted_losses, reduction=current_loss_reduction) + else: + # Compute the stateless loss value for a custom loss class. + # Here we assume that the class takes care of loss reduction + # because if this class returns a vector value we cannot + # differentiate between use case where a custom optimizer + # expects a vector loss value vs unreduced per-sample loss value. + output_loss = loss_fn(y_true, y_pred, sample_weight=sample_weight) + + if len(self.outputs) > 1: + # Keep track of the un-aggregated loss result tensor. + self._compile_metrics_tensors[loss_name] = output_loss + + # Keep track of stateful result tensor and function for the loss. + # Compute the stateful loss value. + if weighted_losses is not None: + # TODO(b/120571621): Directly call metric when the bug is fixed. + aggregated_output_loss = self._call_fn_for_each_replica( + self._output_loss_metrics[i], weighted_losses) + else: + # Custom loss class. + aggregated_output_loss = self._call_metric_fn( + self._output_loss_metrics[i], y_true, y_pred, sample_weight) + self._compile_stateful_metrics_tensors[ + loss_name] = aggregated_output_loss + self._compile_stateful_metric_functions.append( + self._output_loss_metrics[i]) + + if total_loss is None: + total_loss = loss_weight * output_loss + else: + total_loss += loss_weight * output_loss + if total_loss is None: + if not self.losses: + raise ValueError('The model cannot be compiled ' + 'because it has no loss to optimize.') + else: + total_loss = 0. + + # Add regularization penalties and other layer-specific losses. + if self.losses: + total_loss += losses_utils.scale_loss_for_distribution( + math_ops.add_n(self.losses)) + return total_loss + def _get_callback_model(self): """Returns the Callback Model for this Model.""" diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 72c4a29118..b84b9cece8 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -34,6 +34,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import losses from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import testing_utils from tensorflow.python.keras.callbacks import Callback @@ -54,6 +55,129 @@ except ImportError: scipy_sparse = None +class CompileTest(keras_parameterized.TestCase): + + def _get_multi_output_model(self): + input_a = keras.layers.Input(shape=(3,), name='input_a') + output_a = keras.layers.Dense(1, name='dense_1')(input_a) + output_b = keras.layers.Dense(1, name='dense_2')(input_a) + return keras.models.Model(input_a, [output_a, output_b]) + + def _do_test_compile_with_model_and_single_loss(self, model, loss): + model.compile(optimizer='adam', loss=loss) + self.assertEqual(model.loss, loss) + + loss = losses.get(loss) + if not isinstance(loss, list): + loss_list = [loss] * len(model.outputs) + + self.assertEqual(len(model.loss_functions), len(loss_list)) + for i in range(len(loss_list)): + self.assertIsInstance(model.loss_functions[i], losses.LossFunctionWrapper) + if not isinstance(loss_list[i], losses.LossFunctionWrapper): + self.assertEqual(model.loss_functions[i].fn, loss_list[i]) + self.assertAllEqual(model.loss_weights_list, [1.] * len(loss_list)) + + @keras_parameterized.run_all_keras_modes + @parameterized.named_parameters(('loss_string', 'mse'), + ('loss_function', losses.mean_squared_error), + ('loss_instance', losses.MeanSquaredError())) + def test_compile_with_single_output(self, loss): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + self._do_test_compile_with_model_and_single_loss(model, loss) + + @keras_parameterized.run_all_keras_modes + @parameterized.named_parameters(('loss_string', 'mse'), + ('loss_function', losses.mean_squared_error), + ('loss_instance', losses.MeanSquaredError())) + def test_compile_with_multi_output(self, loss): + model = self._get_multi_output_model() + self._do_test_compile_with_model_and_single_loss(model, loss) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_multi_output_and_multi_loss(self): + model = self._get_multi_output_model() + # Test loss is a list. + loss = ['mse', 'mae'] + model.compile(optimizer='adam', loss=loss) + self.assertEqual(model.loss_functions[0].fn, losses.mean_squared_error) + self.assertEqual(model.loss_functions[1].fn, losses.mean_absolute_error) + self.assertAllEqual(model.loss_weights_list, [1., 1.]) + + # Test loss is a dict. + loss = {'dense_1': 'mae', 'dense_2': 'mse'} + model.compile(optimizer='adam', loss=loss) + self.assertEqual(model.loss_functions[0].fn, losses.mean_absolute_error) + self.assertEqual(model.loss_functions[1].fn, losses.mean_squared_error) + self.assertAllEqual(model.loss_weights_list, [1., 1.]) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_multi_output_and_loss_weights_list(self): + model = self._get_multi_output_model() + loss_weights = [1., 2.] + model.compile(optimizer='adam', loss='mse', loss_weights=loss_weights) + self.assertAllEqual(model.loss_weights_list, [1., 2.]) + + def test_compile_with_multi_output_and_loss_weights_dict(self): + with context.graph_mode(): + model = self._get_multi_output_model() + loss_weights = {'dense_1': 1., 'dense_2': 2.} + model.compile(optimizer='adam', loss='mse', loss_weights=loss_weights) + self.assertAllEqual(model.loss_weights_list, [1., 2.]) + + input_np = np.random.random((10, 3)) + output_a_np = np.random.random((10, 1)) + output_b_np = np.random.random((10, 1)) + + with self.cached_session() as sess: + sess.run(variables_lib.global_variables_initializer()) + total_loss, y_preds = sess.run( + [model.total_loss, model.outputs], + feed_dict={ + 'input_a:0': input_np, + 'dense_1_target:0': output_a_np, + 'dense_2_target:0': output_b_np + }) + self.assertAllClose( + total_loss, + np.mean( + np.add((output_a_np - y_preds[0])**2, + 2 * (output_b_np - y_preds[1])**2))) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_incorrect_loss_size(self): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + with self.assertRaisesRegexp(ValueError, 'The model has 1 outputs'): + model.compile(optimizer='adam', loss=['mse', 'mae']) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_incorrect_loss_key(self): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + with self.assertRaisesRegexp( + ValueError, 'Unknown entry in loss dictionary: unknown_output'): + model.compile(optimizer='adam', loss={'unknown_output': 'mse'}) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_incorrect_loss_weights_size(self): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + with self.assertRaisesRegexp(ValueError, + 'it should have one entry per model output'): + model.compile(optimizer='adam', loss='mse', loss_weights=[1., 2.]) + + @keras_parameterized.run_all_keras_modes + def test_compile_with_incorrect_loss_weights_key(self): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=2, input_dim=3) + with self.assertRaisesRegexp( + ValueError, 'Unknown entry in loss_weights dictionary: unknown_output'): + model.compile( + optimizer='adam', loss='mse', loss_weights={'unknown_output': 1.}) + + class TrainingTest(keras_parameterized.TestCase): @keras_parameterized.run_with_all_model_types(exclude_models='sequential') @@ -885,9 +1009,9 @@ class TestExceptionsAndWarnings(keras_parameterized.TestCase): 'dense_1': metrics_module.CategoricalAccuracy(), }, run_eagerly=testing_utils.should_run_eagerly()) - msg = ('Output "dense_1" missing from loss dictionary. We assume this ' + msg = ('Output dense_1 missing from loss dictionary. We assume this ' 'was done on purpose. The fit and evaluate APIs will not be ' - 'expecting any data to be passed to "dense_1".') + 'expecting any data to be passed to dense_1.') self.assertRegexpMatches(str(mock_log.call_args), msg) diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index cb1ca9e429..24d2c2528f 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -1107,6 +1107,95 @@ def prepare_sample_weights(output_names, sample_weight_mode, return sample_weights, sample_weight_modes +def prepare_loss_functions(loss, output_names): + """Converts loss to a list of loss functions. + + Arguments: + loss: String (name of objective function), objective function or + `tf.losses.Loss` instance. See `tf.losses`. If the model has multiple + outputs, you can use a different loss on each output by passing a + dictionary or a list of losses. The loss value that will be minimized by + the model will then be the sum of all individual losses. + output_names: List of model output names. + + Returns: + A list of loss objective functions. + + Raises: + ValueError: If loss is a dict with keys not in model output names, + or if loss is a list with len not equal to model outputs. + """ + if isinstance(loss, collections.Mapping): + for name in loss: + if name not in output_names: + raise ValueError('Unknown entry in loss dictionary: {}. Only expected ' + 'following keys: {}'.format(name, output_names)) + loss_functions = [] + for name in output_names: + if name not in loss: + logging.warning( + 'Output {0} missing from loss dictionary. We assume ' + 'this was done on purpose. The fit and evaluate APIs will not be ' + 'expecting any data to be passed to {0}.'.format(name)) + loss_functions.append(get_loss_function(loss.get(name, None))) + elif isinstance(loss, six.string_types): + loss_functions = [get_loss_function(loss) for _ in output_names] + elif isinstance(loss, collections.Sequence): + if len(loss) != len(output_names): + raise ValueError('When passing a list as loss, it should have one entry ' + 'per model outputs. The model has {} outputs, but you ' + 'passed loss={}'.format(len(output_names), loss)) + loss_functions = nest.map_structure(get_loss_function, loss) + else: + loss_functions = [get_loss_function(loss) for _ in range(len(output_names))] + + return loss_functions + + +def prepare_loss_weights(output_names, loss_weights=None): + """Converts loss weights to a list of loss weights. + + Arguments: + output_names: List of model output names. + loss_weights: Optional list or dictionary specifying scalar coefficients + (Python floats) to weight the loss contributions of different model + outputs. The loss value that will be minimized by the model will then be + the *weighted sum* of all individual losses, weighted by the + `loss_weights` coefficients. If a list, it is expected to have a 1:1 + mapping to the model's outputs. If a dict, it is expected to map + output names (strings) to scalar coefficients. + + Returns: + A list of loss weights of python floats. + + Raises: + ValueError: If loss weight is a dict with key not in model output names, + or if loss is a list with len not equal to model outputs. + """ + if loss_weights is None: + weights_list = [1.] * len(output_names) + elif isinstance(loss_weights, dict): + for name in loss_weights: + if name not in output_names: + raise ValueError('Unknown entry in loss_weights dictionary: {}. ' + 'Only expected the following keys: {}'.format( + name, output_names)) + weights_list = [loss_weights.get(name, 1.) for name in output_names] + elif isinstance(loss_weights, list): + if len(loss_weights) != len(output_names): + raise ValueError('When passing a list as loss_weights, ' + 'it should have one entry per model output. ' + 'The model has ' + str(len(output_names)) + + ' outputs, but you passed loss_weights=' + + str(loss_weights)) + weights_list = loss_weights + else: + raise TypeError('Could not interpret loss_weights argument: ' + + str(loss_weights) + ' - expected a list of dicts.') + + return weights_list + + # TODO(rohanj): This is a hack to get around not depending on feature_column and # create a cyclical dependency. Figure out a cleaner solution def is_feature_layer(layer): -- GitLab From 613739bcee3282a3fdae81f3ae0f7a59302fe9da Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 13:00:37 -0800 Subject: [PATCH 063/351] [TF:XLA] Enable function tests on XLA:GPU. One test is disabled because it does not fail in the bridge as expected. PiperOrigin-RevId: 233812418 --- tensorflow/python/BUILD | 5 +++-- tensorflow/python/framework/function_test.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6f263096f9..542a8fd55e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1310,8 +1310,8 @@ py_library( ], ) -cuda_py_tests( - name = "framework_function_test", +cuda_py_test( + name = "function_test", size = "medium", srcs = ["framework/function_test.py"], additional_deps = [ @@ -1341,6 +1341,7 @@ cuda_py_tests( "noasan", "optonly", ], + xla_enable_strict_auto_jit = True, ) tf_py_test( diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 7543376bcf..cd623223e3 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -284,6 +284,7 @@ class FunctionTest(test.TestCase): out, = sess.run(dlogits, {logits: x, labels: y}) self.assertAllClose(out, np.exp(prob - y)) + @test_util.disable_xla("b/124286351") # No error is raised def testCustomGradientError(self): dtype = dtypes.float32 -- GitLab From 8c92769a36487955686f00d277c5cedea0d70807 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 13 Feb 2019 13:01:44 -0800 Subject: [PATCH 064/351] In TensorArray v2, pass element_shape inferred from write calls to TensorListStack and TensorListGather. PiperOrigin-RevId: 233812594 --- tensorflow/python/kernel_tests/map_fn_test.py | 10 +++++----- tensorflow/python/ops/list_ops.py | 16 ++++++++++++---- tensorflow/python/ops/tensor_array_ops.py | 13 ++++++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/kernel_tests/map_fn_test.py b/tensorflow/python/kernel_tests/map_fn_test.py index 41d99ea379..d2b1d433c7 100644 --- a/tensorflow/python/kernel_tests/map_fn_test.py +++ b/tensorflow/python/kernel_tests/map_fn_test.py @@ -197,17 +197,17 @@ class MapFnTest(test.TestCase): y = map_fn.map_fn(lambda e: e, x) self.assertIs(None, y.get_shape().dims) - @test_util.disable_control_flow_v2("b/119323354") - @test_util.run_in_graph_and_eager_modes + # TODO(b/124383826): this test fails in eager: the iterable is of length 0 so + # so the body of the while loop never executes @test_util.run_v1_only("b/120545219") def testMapEmptyScalar(self): - map_return = map_fn.map_fn(lambda x: 1, constant_op.constant([])) + map_return = map_fn.map_fn(lambda x: 1, + constant_op.constant([], dtype=dtypes.int32)) self.assertAllEqual([0], map_return.get_shape().dims) self.assertAllEqual([0], self.evaluate(map_return).shape) - # TODO(akshayka): this test fails in eager: the iterable is of length 0 so + # TODO(b/124383826): this test fails in eager: the iterable is of length 0 so # so the body of the while loop never executes - @test_util.disable_control_flow_v2("b/119323354") @test_util.run_v1_only("b/120545219") def testMapEmptyTensor(self): with self.cached_session(): diff --git a/tensorflow/python/ops/list_ops.py b/tensorflow/python/ops/list_ops.py index 87409eb2ac..5cb64035c0 100644 --- a/tensorflow/python/ops/list_ops.py +++ b/tensorflow/python/ops/list_ops.py @@ -89,11 +89,15 @@ def tensor_list_pop_back(input_handle, element_dtype, name=None): name=name) -def tensor_list_gather(input_handle, indices, element_dtype, name=None): +def tensor_list_gather(input_handle, + indices, + element_dtype, + element_shape=None, + name=None): return gen_list_ops.tensor_list_gather( input_handle=input_handle, indices=indices, - element_shape=-1, + element_shape=_build_element_shape(element_shape), element_dtype=element_dtype, name=name) @@ -107,10 +111,14 @@ def tensor_list_scatter(tensor, indices, element_shape, name=None): name=name) -def tensor_list_stack(input_handle, element_dtype, num_elements=-1, name=None): +def tensor_list_stack(input_handle, + element_dtype, + num_elements=-1, + element_shape=None, + name=None): return gen_list_ops.tensor_list_stack( input_handle=input_handle, - element_shape=-1, + element_shape=_build_element_shape(element_shape), element_dtype=element_dtype, num_elements=num_elements, name=name) diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 1a11c332ea..96dc60aa82 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -575,18 +575,29 @@ class _GraphTensorArrayV2(object): def stack(self, name=None): """See TensorArray.""" with ops.name_scope(name, "TensorArrayV2Stack", [self._flow]): + if self._element_shape: + element_shape = self._element_shape[0] + else: + element_shape = tensor_shape.TensorShape(None) value = list_ops.tensor_list_stack( - input_handle=self._flow, element_dtype=self._dtype) + input_handle=self._flow, + element_dtype=self._dtype, + element_shape=element_shape) if self._element_shape and self._element_shape[0].dims is not None: value.set_shape([None] + self._element_shape[0].dims) return value def gather(self, indices, name=None): """See TensorArray.""" + if self._element_shape: + element_shape = self._element_shape[0] + else: + element_shape = tensor_shape.TensorShape(None) value = list_ops.tensor_list_gather( input_handle=self._flow, indices=indices, element_dtype=self._dtype, + element_shape=element_shape, name=name) if self._element_shape and self._element_shape[0].dims is not None: value.set_shape([None] + self._element_shape[0].dims) -- GitLab From 7df8df33c5d27f38118b5c1c1f9e287e2b94fea0 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 13 Feb 2019 13:08:05 -0800 Subject: [PATCH 065/351] Add post optimization graph to RunMetadata (when running eager functions) This stores the pre-grappler graph + post-grappler graph + partitioned graphs for each instantiated function. This will be useful to get pre-optimization/post-optimization graphs for displaying within tensorboard. PiperOrigin-RevId: 233813975 --- .../core/common_runtime/eager/execute.cc | 19 +++++++++- .../process_function_library_runtime.cc | 17 ++++++++- tensorflow/core/framework/op_kernel.h | 37 +++++++++++++++++-- tensorflow/core/protobuf/config.proto | 19 ++++++++++ ...rflow.-run-metadata.-function-graphs.pbtxt | 27 ++++++++++++++ .../golden/v1/tensorflow.-run-metadata.pbtxt | 31 ++++++++++++++++ 6 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc index c6e8573cc2..392a0a7a61 100644 --- a/tensorflow/core/common_runtime/eager/execute.cc +++ b/tensorflow/core/common_runtime/eager/execute.cc @@ -932,10 +932,25 @@ Status EagerKernelExecute(EagerContext* ctx, Device* device, { GraphCollector* collector = ctx->GetGraphCollector(); mutex_lock mll(collector->mu); - for (const auto& graph : collector->graphs) { + + // Adding to partition graphs for backward compatibility. + for (const auto& graph : collector->partitioned_graphs) { *ctx->RunMetadataProto()->add_partition_graphs() = graph; } - collector->graphs.clear(); + + if (collector->dirty) { + auto* function_graphs = + ctx->RunMetadataProto()->add_function_graphs(); + *function_graphs->mutable_post_optimization_graph() = + collector->optimized_graph; + *function_graphs->mutable_pre_optimization_graph() = + collector->raw_graph; + for (const auto& graph : collector->partitioned_graphs) { + *function_graphs->add_partition_graphs() = graph; + } + } + + collector->ClearGraphs(); } auto* step_stats = ctx->RunMetadataProto()->mutable_step_stats(); // Lazily initialize the RunMetadata with information about all devices if diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 950a93671c..608ce8028a 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -526,6 +526,13 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice( TF_RETURN_IF_ERROR(GetGraphAndRets(function_name, attrs, fdef, lib_def, &graph, &ret_node_names)); + if (options.graph_collector != nullptr) { + GraphDef def; + graph->ToGraphDef(&def); + *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto(); + options.graph_collector->CollectRawGraph(def); + } + DeviceSet device_set; for (auto d : device_mgr_->ListDevices()) { device_set.AddDevice(d); @@ -592,6 +599,13 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice( OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, optimization_options)); DumpGraph("After all optimization passes", graph.get()); + if (options.graph_collector != nullptr) { + GraphDef def; + graph->ToGraphDef(&def); + *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto(); + options.graph_collector->CollectOptimizedGraph(def); + } + std::unordered_map> subgraphs; TF_RETURN_IF_ERROR( PartitionFunctionGraph(device_set, std::move(graph), &subgraphs)); @@ -600,7 +614,8 @@ Status ProcessFunctionLibraryRuntime::InstantiateMultiDevice( for (const auto& pair : subgraphs) { GraphDef def; pair.second->ToGraphDef(&def); - options.graph_collector->CollectGraph(def); + *def.mutable_library() = lib_def->ReachableDefinitions(def).ToProto(); + options.graph_collector->CollectPartitionedGraph(def); } } diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 06b90964ad..f128b40724 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -525,11 +525,42 @@ struct TensorValue { // Used to store partitioned graphs from function-calling ops. struct GraphCollector { mutex mu; - std::vector graphs GUARDED_BY(mu); + std::vector partitioned_graphs GUARDED_BY(mu); + GraphDef raw_graph GUARDED_BY(mu); + GraphDef optimized_graph GUARDED_BY(mu); - void CollectGraph(const GraphDef& graph) { + bool dirty GUARDED_BY(mu); + + GraphCollector() : dirty(false) {} + + void CollectRawGraph(const GraphDef& graph) { + mutex_lock ml(mu); + raw_graph.MergeFrom(graph); + dirty = true; + } + + void CollectOptimizedGraph(const GraphDef& graph) { + mutex_lock ml(mu); + optimized_graph.MergeFrom(graph); + dirty = true; + } + + void CollectPartitionedGraph(const GraphDef& graph) { + mutex_lock ml(mu); + partitioned_graphs.push_back(graph); + dirty = true; + } + + void ClearGraphs() EXCLUSIVE_LOCKS_REQUIRED(mu) { + raw_graph.Clear(); + optimized_graph.Clear(); + partitioned_graphs.clear(); + dirty = false; + } + + bool HasUpdatedGraphs() { mutex_lock ml(mu); - graphs.push_back(graph); + return dirty; } }; diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 44e98542ec..3e24235369 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -520,6 +520,25 @@ message RunMetadata { // Graphs of the partitions executed by executors. repeated GraphDef partition_graphs = 3; + + message FunctionGraphs { + // TODO(nareshmodi): Include some sort of function/cache-key identifier? + repeated GraphDef partition_graphs = 1; + + GraphDef pre_optimization_graph = 2; + GraphDef post_optimization_graph = 3; + } + // This is only populated for graphs that are run as functions in TensorFlow + // V2. There will be an entry below for each function that is traced. + // The main use cases of the post_optimization_graph and the partition_graphs + // is to give the caller insight into the graphs that were actually run by the + // runtime. Additional information (such as those in step_stats) will match + // these graphs. + // We also include the pre_optimization_graph since it is usually easier to + // read, and is helpful in situations where the caller wants to get a high + // level idea of what the built graph looks like (since the various graph + // optimization passes might change the structure of the graph significantly). + repeated FunctionGraphs function_graphs = 4; } // Defines a connection between two tensors in a `GraphDef`. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt new file mode 100644 index 0000000000..d2e2f583d2 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.-function-graphs.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.RunMetadata.FunctionGraphs" +tf_proto { + descriptor { + name: "FunctionGraphs" + field { + name: "partition_graphs" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + field { + name: "pre_optimization_graph" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + field { + name: "post_optimization_graph" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt index 1287940326..777b889745 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-metadata.pbtxt @@ -23,5 +23,36 @@ tf_proto { type: TYPE_MESSAGE type_name: ".tensorflow.GraphDef" } + field { + name: "function_graphs" + number: 4 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.RunMetadata.FunctionGraphs" + } + nested_type { + name: "FunctionGraphs" + field { + name: "partition_graphs" + number: 1 + label: LABEL_REPEATED + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + field { + name: "pre_optimization_graph" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + field { + name: "post_optimization_graph" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_MESSAGE + type_name: ".tensorflow.GraphDef" + } + } } } -- GitLab From 1e3981b63885401e9a308d02d6468eec7bad2d4d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 13:23:27 -0800 Subject: [PATCH 066/351] [TF:XLA] Enable some tests that were mistakenly disabled. PiperOrigin-RevId: 233816788 --- tensorflow/python/BUILD | 1 + tensorflow/python/ops/bitwise_ops_test.py | 3 --- tensorflow/python/ops/special_math_ops_test.py | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 542a8fd55e..79f4c68ac4 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3403,6 +3403,7 @@ cuda_py_test( ":framework_test_lib", ], tags = ["no_windows"], + xla_enable_strict_auto_jit = True, ) cuda_py_test( diff --git a/tensorflow/python/ops/bitwise_ops_test.py b/tensorflow/python/ops/bitwise_ops_test.py index c182874c7f..d154b6759b 100644 --- a/tensorflow/python/ops/bitwise_ops_test.py +++ b/tensorflow/python/ops/bitwise_ops_test.py @@ -35,7 +35,6 @@ class BitwiseOpTest(test_util.TensorFlowTestCase): super(BitwiseOpTest, self).__init__(method_name) @test_util.run_deprecated_v1 - @test_util.disable_xla("This test never passed for XLA") def testBinaryOps(self): dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64] @@ -73,7 +72,6 @@ class BitwiseOpTest(test_util.TensorFlowTestCase): self.assertAllEqual(truth, popcnt_result) @test_util.run_deprecated_v1 - @test_util.disable_xla("This test never passed for XLA") def testInvertOp(self): dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64, dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64] @@ -99,7 +97,6 @@ class BitwiseOpTest(test_util.TensorFlowTestCase): self.assertAllEqual(inverted, expected) @test_util.run_deprecated_v1 - @test_util.disable_xla("This test never passed for XLA") def testShiftsWithPositiveLHS(self): dtype_list = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64] diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index 60e0f1f832..41ba060a4b 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -119,7 +119,6 @@ class LBetaTest(test.TestCase): special_math_ops.lbeta(x).get_shape()) @test_util.run_in_graph_and_eager_modes - @test_util.disable_xla('This test never passed for XLA') def test_length_1_last_dimension_results_in_one(self): # If there is only one coefficient, the formula still works, and we get one # as the answer, always. -- GitLab From db0ff058a6f6080c82167ffb54500cdf8b2e8dca Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 13 Feb 2019 13:24:55 -0800 Subject: [PATCH 067/351] Fix Add version. PiperOrigin-RevId: 233817076 --- tensorflow/lite/toco/tflite/export_test.cc | 35 ++++++++++++++++++-- tensorflow/lite/toco/tflite/operator.cc | 12 +++---- tensorflow/lite/toco/tflite/operator_test.cc | 2 ++ 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/tensorflow/lite/toco/tflite/export_test.cc b/tensorflow/lite/toco/tflite/export_test.cc index 58cfb4987f..3577b0f979 100644 --- a/tensorflow/lite/toco/tflite/export_test.cc +++ b/tensorflow/lite/toco/tflite/export_test.cc @@ -51,9 +51,27 @@ class ExportTest : public ::testing::Test { output_array.data_type = ArrayDataType::kFloat; input_model_.operators.emplace_back(op); } else if (name == "Add") { - input_model_.operators.emplace_back(new AddOperator); + auto* op = new AddOperator; + op->inputs = {"input1", "input2"}; + op->outputs = {"output"}; + Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]); + Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]); + Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]); + input1_array.data_type = ArrayDataType::kFloat; + input2_array.data_type = ArrayDataType::kFloat; + output_array.data_type = ArrayDataType::kFloat; + input_model_.operators.emplace_back(op); } else if (name == "Sub") { - input_model_.operators.emplace_back(new SubOperator); + auto* op = new SubOperator; + op->inputs = {"input1", "input2"}; + op->outputs = {"output"}; + Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]); + Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]); + Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]); + input1_array.data_type = ArrayDataType::kFloat; + input2_array.data_type = ArrayDataType::kFloat; + output_array.data_type = ArrayDataType::kFloat; + input_model_.operators.emplace_back(op); } else if (name == "Assert") { auto* op = new TensorFlowAssertOperator; @@ -114,7 +132,18 @@ class ExportTest : public ::testing::Test { output_array.data_type = ArrayDataType::kFloat; input_model_.operators.emplace_back(op); } - input_model_.operators.emplace_back(new AddOperator); + { + auto* op = new AddOperator; + op->inputs = {"input1", "input2"}; + op->outputs = {"output"}; + Array& input1_array = input_model_.GetOrCreateArray(op->inputs[0]); + Array& input2_array = input_model_.GetOrCreateArray(op->inputs[1]); + Array& output_array = input_model_.GetOrCreateArray(op->outputs[0]); + input1_array.data_type = ArrayDataType::kFloat; + input2_array.data_type = ArrayDataType::kFloat; + output_array.data_type = ArrayDataType::kFloat; + input_model_.operators.emplace_back(op); + } } std::vector ExportAndSummarizeOperators(const ExportParams& params) { diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 03ced9a80a..0a8ac0cca6 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -200,6 +200,12 @@ class Add : public BuiltinOperatorinputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + // Version 2 supports signed int8 input types. + if (input_array.data_type == ArrayDataType::kInt8) { + return 2; + } return 1; } }; @@ -219,12 +225,6 @@ class AddN : public BuiltinOperatorinputs[0]; - const Array& input_array = op_signature.model->GetArray(input_name); - // Version 2 supports signed int8 input types. - if (input_array.data_type == ArrayDataType::kInt8) { - return 2; - } return 1; } }; diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index e136b3e57a..0c3eab3b3b 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -832,6 +832,8 @@ TEST_F(OperatorTest, VersioningSliceTest) { SimpleVersioningTest(); } +TEST_F(OperatorTest, VersioningAddTest) { SimpleVersioningTest(); } + TEST_F(OperatorTest, VersioningSelectTest) { SelectOperator select_op; select_op.inputs = {"input1"}; -- GitLab From 9a97e6a41e92954002000ae9bf80afe62371fe8c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 13:26:29 -0800 Subject: [PATCH 068/351] Disable test that frequently times out. PiperOrigin-RevId: 233817412 --- tensorflow/contrib/boosted_trees/estimator_batch/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index 64e4c4560b..6a062489ee 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -200,6 +200,7 @@ py_test( tags = [ "no_gpu", "no_pip_gpu", + "notap", # b/124385673 "notsan", ], deps = [ -- GitLab From 73640d97f67d6bd5ec84375449c6b9bba179606b Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Wed, 13 Feb 2019 13:27:34 -0800 Subject: [PATCH 069/351] Delete TODO to change the signature constants to match the Keras ModeKeys PiperOrigin-RevId: 233817648 --- tensorflow/python/saved_model/signature_constants.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/saved_model/signature_constants.py b/tensorflow/python/saved_model/signature_constants.py index 8047d0d5c7..525d18d18e 100644 --- a/tensorflow/python/saved_model/signature_constants.py +++ b/tensorflow/python/saved_model/signature_constants.py @@ -137,8 +137,7 @@ tf_export( # Train/Eval API constants. # Not exported while export_all_saved_models is experimental. DEFAULT_TRAIN_SIGNATURE_DEF_KEY = "train" -# TODO(b/123998850): Change default signature key to "test" after making sure -# that TFMA use cases won't break. + DEFAULT_EVAL_SIGNATURE_DEF_KEY = "eval" SUPERVISED_TRAIN_METHOD_NAME = "tensorflow/supervised/training" -- GitLab From f2458f422698197a3d0dae94b535edb8052f088e Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Wed, 13 Feb 2019 13:44:52 -0800 Subject: [PATCH 070/351] Replacing the batching / unbatching solution with a more robust RebatchDataset as default. Numbers seem to suggest that this is a slightly positive or neutral with respect to perf. PiperOrigin-RevId: 233821354 --- .../distribute/python/input_lib_test.py | 29 ------------------- tensorflow/python/distribute/input_lib.py | 20 +------------ 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/tensorflow/contrib/distribute/python/input_lib_test.py b/tensorflow/contrib/distribute/python/input_lib_test.py index 10a58316ec..204f52b034 100644 --- a/tensorflow/contrib/distribute/python/input_lib_test.py +++ b/tensorflow/contrib/distribute/python/input_lib_test.py @@ -22,7 +22,6 @@ from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations from tensorflow.contrib.distribute.python import multi_worker_test_base -from tensorflow.python.data.experimental.ops import batching from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import input_lib @@ -214,33 +213,5 @@ class InputIteratorMultiWorkerTest( expected_values, sess) -class SplitDatasetBatchTest(test.TestCase): - - def testBatchDataset(self): - dataset = dataset_ops.Dataset.range(100).batch(20) - split_batch_by = 2 - result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - def testMapAndBatchDataset(self): - dataset = dataset_ops.Dataset.range(100) - dataset = dataset.apply(batching.map_and_batch(lambda x: x, 20)) - split_batch_by = 2 - result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - def testPrefetchDataset(self): - dataset = dataset_ops.Dataset.range(100).batch(20).prefetch(1) - split_batch_by = 2 - result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 6b13db39c7..1957fc4287 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -246,7 +246,7 @@ class DatasetIterator(InputIteratorImpl): """ assert isinstance(input_workers, InputWorkers) if split_batch_by: - dataset = _split_dataset_batch(dataset, split_batch_by) + dataset = batching._RebatchDataset(dataset, split_batch_by) # pylint: disable=protected-access iterators = [] for i, worker in enumerate(input_workers.worker_devices): @@ -405,24 +405,6 @@ def _get_dataset_attributes(dataset): return batch_size, drop_remainder, prefetch_buffer -def _split_dataset_batch(dataset, split_batch_by): - """Divide a batch-ed dataset's batches into smaller batches.""" - batch_size, drop_remainder, prefetch_buffer = ( - _get_dataset_attributes(dataset)) - - if batch_size % split_batch_by: - raise ValueError( - "Batch size %s cannot be sharded evenly across replicas %s" % ( - batch_size, split_batch_by)) - new_batch_size = batch_size // split_batch_by - - dataset = dataset.apply(batching.unbatch()) - dataset = dataset.batch(new_batch_size, drop_remainder=drop_remainder) - if prefetch_buffer is not None: - dataset = dataset.prefetch(prefetch_buffer) - return dataset - - class MultiStepContext(object): """A context object that can be used to capture things when running steps. -- GitLab From 70c731e3ede4f671df64840ac91f925ec14fec62 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 13 Feb 2019 13:45:22 -0800 Subject: [PATCH 071/351] Automated rollback of commit 4d9173668fd3ba410532efd901467312b7418752 PiperOrigin-RevId: 233821447 --- tensorflow/cc/BUILD | 1 + tensorflow/cc/saved_model/BUILD | 1 + tensorflow/compiler/tf2xla/BUILD | 1 + tensorflow/core/BUILD | 2 +- tensorflow/core/kernels/BUILD | 83 +------------------------------- tensorflow/tensorflow.bzl | 2 + 6 files changed, 8 insertions(+), 82 deletions(-) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index a09becc49b..cf6d6050fa 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -150,6 +150,7 @@ cc_library_with_android_deps( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", + "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index 52345a376c..dedd55f16a 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -81,6 +81,7 @@ cc_library( ] + if_not_mobile([ "//tensorflow/core:core_cpu", "//tensorflow/core:lib", + "//tensorflow/core:ops", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", ]) + if_android([ diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 5a1a9435c1..585ee7d59d 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -283,6 +283,7 @@ tf_cc_test( "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", + "//tensorflow/core:testlib", ], ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b320a068fc..11237b39e1 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1551,6 +1551,7 @@ cc_library( ":framework_internal", ":lib", ":lib_internal", + ":ops", ":protos_all_cc", ":shape_inference_testutil", ":tensor_testutil", @@ -3940,7 +3941,6 @@ tf_cc_test( "ops/cudnn_rnn_ops_test.cc", ], deps = [ - ":cudnn_rnn_ops", "//tensorflow/core", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 445fde84f1..e9033f3ffd 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -191,7 +191,6 @@ tf_kernel_library( ]), prefix = "collective_ops", deps = [ - "//tensorflow/core:collective_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", @@ -276,7 +275,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", ], alwayslink = 1, @@ -367,7 +365,6 @@ tf_kernel_library( "//tensorflow/core/nccl:nccl_lib", "//tensorflow/core:framework", "//tensorflow/core:gpu_headers_lib", - "//tensorflow/core:nccl_ops_op_lib", ]), ) @@ -569,7 +566,6 @@ cc_library( ":concat_lib_hdrs", ":ops_util_hdrs", ":split_lib_hdrs", - "//tensorflow/core:batch_ops_op_lib", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/kernels/batching_util:periodic_function_dynamic", @@ -790,7 +786,6 @@ ARRAY_DEPS = [ ":ops_util", ":transpose_functor", "//tensorflow/core:array_grad", - "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -819,7 +814,6 @@ tf_kernel_library( deps = [ "//tensorflow/core:framework_headers_lib", "//tensorflow/core:lib", - "//tensorflow/core:set_ops_op_lib", "//third_party/eigen3", ], ) @@ -1192,7 +1186,6 @@ tf_kernel_library( srcs = ["ragged_gather_op.cc"], deps = [ "//tensorflow/core:framework", - "//tensorflow/core:ragged_array_ops_op_lib", ], ) @@ -1204,7 +1197,6 @@ tf_cc_test( ":ops_testutil", ":ragged_gather_op", "//tensorflow/core:framework", - "//tensorflow/core:ragged_array_ops_op_lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", @@ -1216,7 +1208,6 @@ tf_kernel_library( srcs = ["ragged_range_op.cc"], deps = [ "//tensorflow/core:framework", - "//tensorflow/core:ragged_math_ops_op_lib", ], ) @@ -1227,7 +1218,6 @@ tf_cc_test( ":ops_testutil", ":ragged_range_op", "//tensorflow/core:framework", - "//tensorflow/core:ragged_math_ops_op_lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", @@ -1239,7 +1229,6 @@ tf_kernel_library( srcs = ["ragged_tensor_to_sparse_kernel.cc"], deps = [ "//tensorflow/core:framework", - "//tensorflow/core:ragged_conversion_ops_op_lib", ], ) @@ -1252,7 +1241,6 @@ tf_cc_test( ":ragged_tensor_to_sparse_kernel", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:ragged_conversion_ops_op_lib", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", @@ -1266,7 +1254,6 @@ tf_kernel_library( deps = [ ":bounds_check_lib", ":gpu_util_hdrs", - "//tensorflow/core:cudnn_rnn_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -1342,7 +1329,6 @@ tf_cuda_cc_test( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", @@ -1867,7 +1853,6 @@ tf_kernel_library( prefix = "candidate_sampler_ops", deps = [ ":range_sampler", - "//tensorflow/core:candidate_sampling_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", ], @@ -1901,7 +1886,6 @@ tf_kernel_library( name = "control_flow_ops", prefix = "control_flow_ops", deps = [ - "//tensorflow/core:control_flow_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", ], @@ -1913,7 +1897,6 @@ tf_kernel_library( deps = [ ":bounds_check", ":ops_util", - "//tensorflow/core:ctc_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/util/ctc:ctc_beam_search_lib", @@ -1994,7 +1977,6 @@ DATA_FLOW_DEPS = [ ":typed_queue", "//third_party/eigen3", "//tensorflow/core:core_cpu", - "//tensorflow/core:data_flow_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -2059,7 +2041,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:scoped_allocator_ops_op_lib", ], ) @@ -2078,7 +2059,6 @@ tf_cuda_cc_test( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:proto_text", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", @@ -2126,7 +2106,6 @@ tf_kernel_library( DYNAMIC_DEPS = [ ":bounds_check", "//tensorflow/core:core_cpu", - "//tensorflow/core:data_flow_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -2159,7 +2138,6 @@ LOOKUP_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:lookup_ops_op_lib", ] tf_kernel_library( @@ -2198,7 +2176,6 @@ tf_kernel_library( deps = [ ":lookup_table_init_op", ":lookup_table_op", - "//tensorflow/core:checkpoint_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//third_party/eigen3", @@ -2209,7 +2186,6 @@ tf_kernel_library( name = "load_and_remap_matrix_op", srcs = ["load_and_remap_matrix_op.cc"], deps = [ - "//tensorflow/core:checkpoint_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -2354,7 +2330,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:resource_variable_ops_op_lib", "@com_google_absl//absl/strings", ], ) @@ -2372,7 +2347,6 @@ tf_kernel_library( ":fill_functor", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:list_ops_op_lib", "//third_party/eigen3", ], ) @@ -2383,7 +2357,6 @@ tf_kernel_library( deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:user_ops_op_lib", ], ) @@ -2406,7 +2379,6 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", - "//tensorflow/core:functional_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", @@ -2419,7 +2391,6 @@ tf_kernel_library( deps = [ "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", - "//tensorflow/core:functional_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", @@ -2464,7 +2435,6 @@ IMAGE_DEPS = [ "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:gif_internal", - "//tensorflow/core:image_ops_op_lib", "//tensorflow/core:jpeg_internal", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", @@ -2805,7 +2775,6 @@ cc_library( IO_DEPS = [ ":ops_util", "//tensorflow/core:framework", - "//tensorflow/core:io_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -2849,7 +2818,6 @@ SAVE_RESTORE_DEPS = [ ":bounds_check_lib", ":save_restore_tensor", "//tensorflow/core:framework", - "//tensorflow/core:io_ops_op_lib", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -2969,7 +2937,6 @@ LINALG_DEPS = [ "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:linalg_ops_op_lib", ] + if_cuda([ ":cuda_solvers", ":transpose_functor", @@ -3119,7 +3086,6 @@ LOGGING_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:logging_ops_op_lib", "//tensorflow/core:protos_all_cc", ] @@ -3191,7 +3157,6 @@ tf_kernel_library( ":bounds_check", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:manip_ops_op_lib", "//third_party/eigen3", ], ) @@ -3223,7 +3188,6 @@ MATH_DEPS = [ "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:math_grad", - "//tensorflow/core:math_ops_op_lib", "//third_party/eigen3", ] @@ -3334,7 +3298,7 @@ tf_kernel_library( tf_kernel_library( name = "cwise_op", prefix = "cwise_op", - deps = MATH_DEPS + ["//tensorflow/core:bitwise_ops_op_lib"], + deps = MATH_DEPS, ) tf_kernel_library( @@ -3353,7 +3317,6 @@ tf_kernel_library( name = "fft_ops", prefix = "fft_ops", deps = MATH_DEPS + [ - "//tensorflow/core:spectral_ops_op_lib", ] + if_cuda([ "//tensorflow/core/platform/default/build_config:cufft_plugin", ]), @@ -3560,10 +3523,7 @@ tf_cuda_cc_test( ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", - "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", @@ -3801,7 +3761,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:nn_ops_op_lib", ] + select({ ":xsmm_convolutions": [ "@libxsmm_archive//:xsmm_avx", @@ -3831,7 +3790,6 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:nn_ops_op_lib", ] + if_cuda([ "@cub_archive//:cub", "@local_config_cuda//cuda:cudnn_header", @@ -3851,7 +3809,6 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:nn_ops_op_lib", ] + if_cuda([ "@local_config_cuda//cuda:cudnn_header", ]), @@ -3898,9 +3855,8 @@ NN_DEPS = [ "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_grad", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", -] + if_mkl(["//tensorflow/core:mkl_nn_ops_op_lib"]) +] tf_kernel_library( name = "batch_norm_op", @@ -4036,7 +3992,6 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_grad", - "//tensorflow/core:nn_ops_op_lib", ] + if_cuda(["@cub_archive//:cub"]), ) @@ -4145,7 +4100,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:nn_ops_op_lib", "//tensorflow/core:stream_executor", "//third_party/eigen3", ], @@ -4189,7 +4143,6 @@ tf_kernel_library( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", ], ) @@ -4271,7 +4224,6 @@ cc_library( PARSING_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:parsing_ops_op_lib", "//tensorflow/core:proto_text", "//tensorflow/core:protos_all_cc", ] @@ -4340,7 +4292,6 @@ RANDOM_OPS_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:random_ops_op_lib", ] tf_kernel_library( @@ -4404,7 +4355,6 @@ tf_kernel_library( ":random_op", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:stateless_random_ops_op_lib", ], ) @@ -4419,8 +4369,6 @@ cc_library( REQUIRED_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:no_op_op_lib", - "//tensorflow/core:sendrecv_ops_op_lib", ] tf_kernel_library( @@ -4481,7 +4429,6 @@ cc_library( SPARSE_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:sparse_ops_op_lib", ] tf_kernel_library( @@ -4729,7 +4676,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:sdca_ops_op_lib", "//third_party/eigen3", "@farmhash_archive//:farmhash", ], @@ -4769,7 +4715,6 @@ STATE_DEPS = [ "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:state_ops_op_lib", ] + if_sycl(["//tensorflow/core:sycl_runtime"]) tf_kernel_library( @@ -4907,7 +4852,6 @@ STRING_DEPS = [ "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:string_ops_op_lib", ] tf_kernel_library( @@ -5058,7 +5002,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:string_ops_op_lib", "//third_party/eigen3", "//third_party/icu/data:conversion_data", "@icu//:common", @@ -5080,7 +5023,6 @@ tf_kernel_library( ":variable_ops", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:training_ops_op_lib", "//third_party/eigen3", ], ) @@ -5142,7 +5084,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:random_ops_op_lib", ], ) @@ -5170,7 +5111,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:random_ops_op_lib", ], ) @@ -6078,12 +6018,9 @@ tf_kernel_library( ":ops_util", ":pooling_ops", ":quantization_utils", - "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//tensorflow/core:math_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", "@gemmlowp", ], @@ -6677,7 +6614,6 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:remote_fused_graph_ops_op_lib", ], ) @@ -6832,8 +6768,6 @@ tf_mkl_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", ] + mkl_deps(), ) @@ -6884,8 +6818,6 @@ tf_mkl_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", ] + mkl_deps(), ) @@ -6904,8 +6836,6 @@ tf_mkl_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", ] + mkl_deps(), ) @@ -6919,8 +6849,6 @@ tf_mkl_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", ] + mkl_deps(), ) @@ -6935,8 +6863,6 @@ tf_mkl_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:mkl_nn_ops_op_lib", - "//tensorflow/core:nn_ops_op_lib", "//third_party/eigen3", ] + mkl_deps(), ) @@ -7139,7 +7065,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "//tensorflow/core:summary_ops_op_lib", "//tensorflow/core/lib/db:sqlite", "//tensorflow/core/summary:schema", "//tensorflow/core/summary:summary_db_writer", @@ -7153,7 +7078,6 @@ tf_kernel_library( "decode_proto_op.cc", ], deps = [ - "//tensorflow/core:decode_proto_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/util/proto:decode", @@ -7168,7 +7092,6 @@ tf_kernel_library( name = "encode_proto_op", srcs = ["encode_proto_op.cc"], deps = [ - "//tensorflow/core:encode_proto_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/util/proto:descriptors", @@ -7186,7 +7109,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "//tensorflow/core:rpc_ops_op_lib", "//tensorflow/core/util/rpc:call_container", "//tensorflow/core/util/rpc:rpc_factory", "//tensorflow/core/util/rpc:rpc_factory_registry", @@ -7199,7 +7121,6 @@ tf_kernel_library( srcs = ["unicode_script_op.cc"], deps = [ "//tensorflow/core:framework", - "//tensorflow/core:string_ops_op_lib", "@icu//:common", ], ) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 6c8b445edd..988b1e2159 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -612,6 +612,7 @@ def tf_gen_op_wrappers_cc( clean_dep("//tensorflow/core:core_cpu"), clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), + clean_dep("//tensorflow/core:ops"), clean_dep("//tensorflow/core:protos_all_cc"), ]) + if_android([ clean_dep("//tensorflow/core:android_tensorflow_lib"), @@ -628,6 +629,7 @@ def tf_gen_op_wrappers_cc( clean_dep("//tensorflow/core:core_cpu"), clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), + clean_dep("//tensorflow/core:ops"), clean_dep("//tensorflow/core:protos_all_cc"), ]) + if_android([ clean_dep("//tensorflow/core:android_tensorflow_lib"), -- GitLab From 77c09003f6ca8ae84b90cceb20ac870a3c8c230d Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Wed, 13 Feb 2019 13:50:46 -0800 Subject: [PATCH 072/351] Fix issue: Callbacks do not log values in eager mode when a deferred build model is used. PiperOrigin-RevId: 233822475 --- tensorflow/python/keras/callbacks.py | 57 +++++++++++++++---- tensorflow/python/keras/callbacks_test.py | 45 +++++++++++++++ .../python/keras/engine/training_generator.py | 21 ++++++- 3 files changed, 110 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 926a555f62..70119324ea 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -48,7 +48,6 @@ except ImportError: requests = None -# pylint: disable=protected-access def configure_callbacks(callbacks, model, do_validation=False, @@ -87,20 +86,57 @@ def configure_callbacks(callbacks, # Add additional callbacks during training. if mode == ModeKeys.TRAIN: model.history = History() - stateful_metric_names = None - if hasattr(model, 'metrics_names'): - stateful_metric_names = model.metrics_names[1:] # Exclude `loss` - callbacks = [BaseLogger(stateful_metrics=stateful_metric_names) - ] + (callbacks or []) + [model.history] + callbacks = [BaseLogger()] + (callbacks or []) + [model.history] if verbose: - callbacks.append( - ProgbarLogger(count_mode, stateful_metrics=stateful_metric_names)) + callbacks.append(ProgbarLogger(count_mode)) callback_list = CallbackList(callbacks) # Set callback model - callback_model = model._get_callback_model() + callback_model = model._get_callback_model() # pylint: disable=protected-access callback_list.set_model(callback_model) + set_callback_parameters( + callback_list, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=samples, + verbose=verbose, + mode=mode) + + callback_list.model.stop_training = False + return callback_list + + +def set_callback_parameters(callback_list, + model, + do_validation=False, + batch_size=None, + epochs=None, + steps_per_epoch=None, + samples=None, + verbose=1, + mode=ModeKeys.TRAIN): + """Sets callback parameters. + + Arguments: + callback_list: CallbackList instance. + model: Model being trained. + do_validation: Whether or not validation loop will be run. + batch_size: Number of samples per batch. + epochs: Number of epoch to train. + steps_per_epoch: Number of batches to run per training epoch. + samples: Number of training samples. + verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger. + mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT. + Which loop mode to configure callbacks for. + """ + for cbk in callback_list: + if isinstance(cbk, (BaseLogger, ProgbarLogger)): + cbk.stateful_metrics = model.metrics_names[1:] # Exclude `loss` + # Set callback parameters callback_metrics = [] # When we have deferred build scenario with iterator input, we will compile @@ -119,9 +155,6 @@ def configure_callbacks(callbacks, 'metrics': callback_metrics, } callback_list.set_params(callback_params) - callback_list.model.stop_training = False - return callback_list -# pylint: enable=protected-access def _is_generator_like(data): diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index e188c68c0b..4863e5ceac 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -23,6 +23,7 @@ import csv import os import re import shutil +import sys import threading import unittest @@ -30,9 +31,11 @@ from absl.testing import parameterized import numpy as np from tensorflow.python import keras +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import random_seed from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils +from tensorflow.python.ops import array_ops from tensorflow.python.ops import summary_ops_v2 from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging @@ -219,6 +222,48 @@ class CallbackCountsTest(keras_parameterized.TestCase): class KerasCallbacksTest(keras_parameterized.TestCase): + def _get_model(self, input_shape=None): + layers = [ + keras.layers.Dense(3, activation='relu'), + keras.layers.Dense(2, activation='softmax') + ] + model = testing_utils.get_model_from_layers(layers, input_shape=input_shape) + model.compile( + loss='mse', + optimizer='rmsprop', + metrics=[keras.metrics.CategoricalAccuracy(name='my_acc')], + run_eagerly=testing_utils.should_run_eagerly()) + return model + + @keras_parameterized.run_with_all_model_types + @keras_parameterized.run_all_keras_modes + def test_progbar_logging(self): + model = self._get_model(input_shape=(3,)) + + x = array_ops.ones((50, 3)) + y = array_ops.zeros((50, 2)) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(10) + expected_log = r'(.*- loss:.*- my_acc:.*)+' + + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertRegexpMatches(printed.contents(), expected_log) + + @keras_parameterized.run_with_all_model_types(exclude_models='functional') + @keras_parameterized.run_all_keras_modes + def test_progbar_logging_deferred_model_build(self): + model = self._get_model() + self.assertFalse(model.built) + + x = array_ops.ones((50, 3)) + y = array_ops.zeros((50, 2)) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).batch(10) + expected_log = r'(.*- loss:.*- my_acc:.*)+' + + with self.captureWritesToStream(sys.stdout) as printed: + model.fit(dataset, epochs=2, steps_per_epoch=10) + self.assertRegexpMatches(printed.contents(), expected_log) + @keras_parameterized.run_with_all_model_types def test_ModelCheckpoint(self): if h5py is None: diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py index 0172b47e14..909fd38557 100644 --- a/tensorflow/python/keras/engine/training_generator.py +++ b/tensorflow/python/keras/engine/training_generator.py @@ -242,13 +242,32 @@ def model_iteration(model, callbacks._call_batch_hook(mode, 'begin', step, batch_logs) progbar.on_batch_begin(step, batch_logs) + is_deferred = not model._is_compiled batch_outs = batch_function(*batch_data) if not isinstance(batch_outs, list): batch_outs = [batch_outs] - # Aggregate results. if step == 0: aggregator.create(batch_outs) + + if is_deferred: + # Set callbacks params. We do this here when model is compiled only + # in the first iteration of this loop (deferred build scenario). + cbks.set_callback_parameters( + callbacks, + model, + do_validation=do_validation, + batch_size=batch_size, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + samples=num_samples_or_steps, + verbose=verbose, + mode=mode) + + progbar.params = callbacks.params + progbar.params['verbose'] = verbose + + # Aggregate results. aggregator.aggregate(batch_outs) # Callbacks batch end. -- GitLab From bcc12d401f234745a2636d24f5365082a734e42f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 13:57:06 -0800 Subject: [PATCH 073/351] Change xla::ExecuteWithDeviceBuffers() to take a pointer, not a unique_ptr(), to allow re-use of Executable. This is to allow the XLA executable to be used multiple times without recompiling or marshalling/unmarshalling. PiperOrigin-RevId: 233823805 --- tensorflow/compiler/xla/service/hlo_runner.cc | 6 +++--- tensorflow/compiler/xla/service/hlo_runner.h | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 84399f17e5..5a5401e351 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -176,7 +176,7 @@ StatusOr HloRunner::Execute( TransferLiteralsToDevice(arguments)); TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result, ExecuteWithDeviceBuffers( - /*module=*/std::move(executable), + /*executable=*/executable.get(), /*arguments=*/argument_buffers, /*profile=*/profile)); return TransferLiteralFromDevice(result); @@ -235,7 +235,7 @@ StatusOr HloRunner::ExecuteWithDeviceBuffers( } StatusOr HloRunner::ExecuteWithDeviceBuffers( - std::unique_ptr executable, + Executable* executable, const absl::Span arguments, ExecutionProfile* profile) { // Get service run options. @@ -254,7 +254,7 @@ StatusOr HloRunner::ExecuteWithDeviceBuffers( } StatusOr HloRunner::ExecuteWithDeviceBuffers( - std::unique_ptr executable, + Executable* executable, const absl::Span arguments, ExecutionProfile* profile) { std::vector argument_pointers; diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index a6e6015d6a..fb897aa959 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -144,13 +144,16 @@ class HloRunner { const absl::Span arguments, bool run_hlo_passes = true, ExecutionProfile* profile = nullptr); + // In the following two calls, "executable" is not a unique_ptr to allow + // reuse of the Executable. This call may update the profile information in + // *executable. StatusOr ExecuteWithDeviceBuffers( - std::unique_ptr executable, + Executable* executable, const absl::Span arguments, ExecutionProfile* profile = nullptr); StatusOr ExecuteWithDeviceBuffers( - std::unique_ptr executable, + Executable* executable, const absl::Span arguments, ExecutionProfile* profile = nullptr); -- GitLab From e9deb127980812d2925d701c919f094c977b359f Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Wed, 13 Feb 2019 13:57:38 -0800 Subject: [PATCH 074/351] Expose tf.summary.record_if(condition) context manager in TF 2.0 This generalizes the TF 1.x contrib summary APIs always_record_summaries(), never_record_summaries(), and record_summaries_every_n_global_steps(). The new context manager accepts a "condition" that can be a constant boolean, a boolean tensor value, or a callable returning such. PiperOrigin-RevId: 233823923 --- tensorflow/python/eager/context.py | 12 ++++++- tensorflow/python/ops/summary_ops_v2.py | 35 +++++++------------ .../api/golden/v2/tensorflow.summary.pbtxt | 4 +++ 3 files changed, 27 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 23184143d6..e3c7bb575f 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -141,8 +141,8 @@ class _EagerContext(threading.local): self.mode = default_execution_mode self.is_eager = default_execution_mode == EAGER_MODE self.scope_name = "" - self.recording_summaries = False self.summary_writer_resource = None + self.recording_summaries = None self.scalar_cache = {} self._ones_rank_cache = None self._zeros_cache = None @@ -520,6 +520,16 @@ class Context(object): """Sets summary writer resource.""" self._eager_context.summary_writer_resource = resource + @property + def recording_summaries(self): + """Returns summary recording condition.""" + return self._eager_context.recording_summaries + + @recording_summaries.setter + def recording_summaries(self, condition): + """Sets summary recording condition.""" + self._eager_context.recording_summaries = condition + @property def device_name(self): """Returns the device name for the current thread.""" diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py index 168cb97554..835e79bb08 100644 --- a/tensorflow/python/ops/summary_ops_v2.py +++ b/tensorflow/python/ops/summary_ops_v2.py @@ -45,11 +45,6 @@ from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -# Dictionary mapping graph keys to a boolean Tensor (or callable returning -# a boolean Tensor) indicating whether we should record summaries for the -# graph identified by the key of the dictionary. -_SHOULD_RECORD_SUMMARIES = {} - # A global dictionary mapping graph keys to a list of summary writer init ops. _SUMMARY_WRITER_INIT_OP = {} @@ -61,10 +56,8 @@ _USER_NAME_PATTERNS = re.compile(r"^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$", re.I) def _should_record_summaries_internal(): """Returns boolean Tensor if summaries should/shouldn't be recorded, or None. """ - global _SHOULD_RECORD_SUMMARIES - key = ops.get_default_graph()._graph_key # pylint: disable=protected-access - should = _SHOULD_RECORD_SUMMARIES.get(key) - return should() if callable(should) else should + condition = context.context().recording_summaries + return condition() if callable(condition) else condition def _should_record_summaries_v2(): @@ -83,32 +76,28 @@ def should_record_summaries(): return False if result is None else result +@tf_export("summary.record_if", v1=[]) @tf_contextlib.contextmanager -def _record_summaries(boolean=True): +def record_if(condition): """Sets summary recording on or off per the provided boolean value. The provided value can be a python boolean, a scalar boolean Tensor, or or a callable providing such a value; if a callable is passed it will be - invoked each time should_record_summaries() is called to determine whether - summary writing should be enabled. + invoked on-demand to determine whether summary writing will occur. Args: - boolean: can be True, False, a bool Tensor, or a callable providing such. - Defaults to True. + condition: can be True, False, a bool Tensor, or a callable providing such. Yields: Returns a context manager that sets this value on enter and restores the previous value on exit. """ - # TODO(nickfelt): make this threadlocal - global _SHOULD_RECORD_SUMMARIES - key = ops.get_default_graph()._graph_key # pylint: disable=protected-access - old = _SHOULD_RECORD_SUMMARIES.setdefault(key, None) + old = context.context().recording_summaries try: - _SHOULD_RECORD_SUMMARIES[key] = boolean + context.context().recording_summaries = condition yield finally: - _SHOULD_RECORD_SUMMARIES[key] = old + context.context().recording_summaries = old # TODO(apassos) consider how to handle local step here. @@ -120,17 +109,17 @@ def record_summaries_every_n_global_steps(n, global_step=None): should = lambda: math_ops.equal(global_step % n, 0) if not context.executing_eagerly(): should = should() - return _record_summaries(should) + return record_if(should) def always_record_summaries(): """Sets the should_record_summaries Tensor to always true.""" - return _record_summaries(True) + return record_if(True) def never_record_summaries(): """Sets the should_record_summaries Tensor to always false.""" - return _record_summaries(False) + return record_if(False) @tf_export("summary.SummaryWriter", v1=[]) diff --git a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt index c59f1b8474..335489865b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt @@ -16,6 +16,10 @@ tf_module { name: "import_event" argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "record_if" + argspec: "args=[\'condition\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "summary_scope" argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'summary\', \'None\'], " -- GitLab From 85deaa11cae878ba2c0e5284085956f75434b5b2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 13:58:47 -0800 Subject: [PATCH 075/351] [TF:XLA] Add XlaSelfAdjointEigOp PiperOrigin-RevId: 233824166 --- tensorflow/compiler/tests/BUILD | 23 +++++++ .../tests/self_adjoint_eig_op_test.py | 62 +++++++++++++++++ tensorflow/compiler/tf2xla/kernels/BUILD | 3 + .../tf2xla/kernels/xla_self_adjoint_eig_op.cc | 66 +++++++++++++++++++ tensorflow/compiler/tf2xla/ops/xla_ops.cc | 35 ++++++++++ tensorflow/compiler/tf2xla/python/xla.py | 4 ++ tensorflow/compiler/xla/client/lib/BUILD | 13 ++-- ...f_adjoint_eigen.cc => self_adjoint_eig.cc} | 34 ++++++++-- ...elf_adjoint_eigen.h => self_adjoint_eig.h} | 14 ++-- ...eigen_test.cc => self_adjoint_eig_test.cc} | 64 +++++++++--------- tensorflow/python/kernel_tests/BUILD | 2 +- 11 files changed, 267 insertions(+), 53 deletions(-) create mode 100644 tensorflow/compiler/tests/self_adjoint_eig_op_test.py create mode 100644 tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc rename tensorflow/compiler/xla/client/lib/{self_adjoint_eigen.cc => self_adjoint_eig.cc} (93%) rename tensorflow/compiler/xla/client/lib/{self_adjoint_eigen.h => self_adjoint_eig.h} (71%) rename tensorflow/compiler/xla/client/lib/{self_adjoint_eigen_test.cc => self_adjoint_eig_test.cc} (84%) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 9b6ca4092c..7c1e0daf0b 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -250,6 +250,29 @@ tf_xla_py_test( ], ) +tf_xla_py_test( + name = "self_adjoint_eig_op_test", + size = "medium", + srcs = ["self_adjoint_eig_op_test.py"], + # TODO(kuny): remove it after b/124377352 is fixed. + disabled_backends = [ + "cpu", + "gpu", + "cpu_ondemand", + ], + tags = ["optonly"], + deps = [ + ":xla_test", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework", + "//tensorflow/python:map_fn", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:training", + "@absl_py//absl/testing:parameterized", + ], +) + tf_xla_py_test( name = "matrix_triangular_solve_op_test", size = "small", diff --git a/tensorflow/compiler/tests/self_adjoint_eig_op_test.py b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py new file mode 100644 index 0000000000..cfb5c82b22 --- /dev/null +++ b/tensorflow/compiler/tests/self_adjoint_eig_op_test.py @@ -0,0 +1,62 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for tensorflow.ops.self_adjoint_eig.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools +from absl.testing import parameterized +import numpy as np + +from tensorflow.compiler.tests import xla_test +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops +from tensorflow.python.platform import test + + +class SelfAdjointEigOpTest(xla_test.XLATestCase, parameterized.TestCase): + + def _test(self, dtype, shape): + np.random.seed(1) + x_np = np.random.uniform( + low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype) + x_np = x_np + np.swapaxes(x_np, -1, -2) + n = shape[-1] + + e_np, _ = np.linalg.eigh(x_np) + with self.cached_session() as sess: + x_tf = array_ops.placeholder(dtype) + with self.test_scope(): + e, v = linalg_ops.self_adjoint_eig(x_tf) + e_val, v_val = sess.run([e, v], feed_dict={x_tf: x_np}) + + v_diff = np.matmul(v_val, np.swapaxes(v_val, -1, -2)) - np.eye(n) + self.assertAlmostEqual(np.mean(v_diff**2), 0.0, delta=1e-6) + self.assertAlmostEqual(np.mean((e_val - e_np)**2), 0.0, delta=1e-6) + + SIZES = [1, 2, 5, 10, 32] + DTYPES = [np.float32] + PARAMS = itertools.product(SIZES, DTYPES) + + @parameterized.parameters(*PARAMS) + def testSelfAdjointEig(self, n, dtype): + for batch_dims in [(), (3,)] + [(3, 2)] * (n < 10): + self._test(dtype, batch_dims + (n, n)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index b3f050c52b..343568b239 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -107,11 +107,13 @@ tf_kernel_library( "xla_pad_op.cc", "xla_reduce_op.cc", "xla_select_and_scatter_op.cc", + "xla_self_adjoint_eig_op.cc", ], hdrs = [ "index_ops.h", "shape_util.h", ], + tags = ["optonly"], deps = [ ":conv_op_helpers", ":if_op", @@ -143,6 +145,7 @@ tf_kernel_library( "//tensorflow/compiler/xla/client/lib:prng", "//tensorflow/compiler/xla/client/lib:qr", "//tensorflow/compiler/xla/client/lib:quantize", + "//tensorflow/compiler/xla/client/lib:self_adjoint_eig", "//tensorflow/compiler/xla/client/lib:sorting", "//tensorflow/core:bitwise_ops_op_lib", "//tensorflow/core:control_flow_ops_op_lib", diff --git a/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc b/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc new file mode 100644 index 0000000000..233ac8e7b4 --- /dev/null +++ b/tensorflow/compiler/tf2xla/kernels/xla_self_adjoint_eig_op.cc @@ -0,0 +1,66 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" +#include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h" +#include "tensorflow/core/lib/core/bits.h" + +namespace tensorflow { +namespace { + +class XlaSelfAdjointEigOp : public XlaOpKernel { + public: + explicit XlaSelfAdjointEigOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("lower", &lower_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_iter", &max_iter_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("epsilon", &epsilon_)); + } + void Compile(XlaOpKernelContext* ctx) override { + auto result = + xla::SelfAdjointEig(ctx->Input(0), lower_, max_iter_, epsilon_); + ctx->SetOutput(0, result.w); + ctx->SetOutput(1, result.v); + } + + private: + bool lower_; + int32 max_iter_; + float epsilon_; +}; + +class SelfAdjointEigV2Op : public XlaOpKernel { + public: + explicit SelfAdjointEigV2Op(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} + void Compile(XlaOpKernelContext* ctx) override { + const TensorShape input_shape = ctx->InputShape("input"); + int n = input_shape.dim_size(input_shape.dims() - 1); + // This is based on heuristics that approx log(n) sweep updates are needed. + // Note: the heuristics provides no theoretical guarantee, max_iter=100 and + // epsilon should be used to determine exit condition. + int max_iter = 2 * tensorflow::Log2Ceiling(n); + auto result = xla::SelfAdjointEig(ctx->Input(0), true, max_iter, 1e-6); + ctx->SetOutput(0, result.w); + ctx->SetOutput(1, result.v); + } +}; + +REGISTER_XLA_OP(Name("XlaSelfAdjointEig").TypeConstraint("T", kFloatTypes), + XlaSelfAdjointEigOp); +REGISTER_XLA_OP(Name("SelfAdjointEigV2").TypeConstraint("T", kFloatTypes), + SelfAdjointEigV2Op); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index af641131ed..ccd58071d3 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -56,6 +56,41 @@ lhs_output: the broadcasted LHS tensor rhs_output: the broadcasted RHS tensor )doc"); +REGISTER_OP("XlaSelfAdjointEig") + .Input("a: T") + .Attr("lower: bool") + .Attr("max_iter: int") + .Attr("epsilon: float") + .Output("w: T") + .Output("v: T") + .SetShapeFn(shape_inference::UnknownShape) + .Attr("T: numbertype") + .Doc(R"doc( +Computes the eigen decomposition of a batch of self-adjoint matrices +(Note: Only real inputs are supported). + +Computes the eigenvalues and eigenvectors of the innermost N-by-N matrices in +tensor such that tensor[...,:,:] * v[..., :,i] = e[..., i] * v[...,:,i], for +i=0...N-1. + +a: the input tensor. + +lower: a boolean specifies whether the calculation is done with the lower + triangular part or the upper triangular part. + +max_iter: maximum number of sweep update, i.e., the whole lower triangular + part or upper triangular part based on parameter lower. Heuristically, it has + been argued that approximatly logN sweeps are needed in practice (Ref: Golub & + van Loan "Matrix Computation"). + +epsilon: the tolerance ratio. + +w: The eigenvalues in ascending order, each repeated according to its + multiplicity. +v: The column v[..., :, i] is the normalized eigenvector corresponding to the + eigenvalue w[..., i]. +)doc"); + REGISTER_OP("XlaConv") .Input("lhs: T") .Input("rhs: T") diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index 345193c936..de4710d03a 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -291,6 +291,10 @@ def dot_general(lhs, rhs, dimension_numbers, precision_config=None, name=None): name=name) +def self_adjoint_eig(a, lower, max_iter, epsilon): + return gen_xla_ops.xla_self_adjoint_eig(a, lower, max_iter, epsilon) + + dynamic_slice = gen_xla_ops.xla_dynamic_slice dynamic_update_slice = gen_xla_ops.xla_dynamic_update_slice diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index 9461343542..c5dea5f180 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -452,11 +452,12 @@ cc_library( ) cc_library( - name = "self_adjoint_eigen", - srcs = ["self_adjoint_eigen.cc"], - hdrs = ["self_adjoint_eigen.h"], + name = "self_adjoint_eig", + srcs = ["self_adjoint_eig.cc"], + hdrs = ["self_adjoint_eig.h"], deps = [ ":arithmetic", + ":comparators", ":constants", ":loops", ":math", @@ -473,8 +474,8 @@ cc_library( ) xla_test( - name = "self_adjoint_eigen_test", - srcs = ["self_adjoint_eigen_test.cc"], + name = "self_adjoint_eig_test", + srcs = ["self_adjoint_eig_test.cc"], blacklisted_backends = [ "cpu", "gpu", @@ -486,7 +487,7 @@ xla_test( ":arithmetic", ":constants", ":matrix", - ":self_adjoint_eigen", + ":self_adjoint_eig", "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:array3d", "//tensorflow/compiler/xla:literal", diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc similarity index 93% rename from tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc rename to tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc index 1dc87c29a9..546127e462 100644 --- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.cc +++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.cc @@ -13,12 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h" +#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h" #include #include #include "tensorflow/compiler/xla/client/lib/arithmetic.h" +#include "tensorflow/compiler/xla/client/lib/comparators.h" #include "tensorflow/compiler/xla/client/lib/constants.h" #include "tensorflow/compiler/xla/client/lib/loops.h" #include "tensorflow/compiler/xla/client/lib/math.h" @@ -341,6 +342,27 @@ StatusOr> WhileLoopFn( return values; } +StatusOr SortByEigenvalues(SelfAdjointEigResult result) { + XlaBuilder* builder = result.v.builder(); + TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(result.v)); + const int64 num_dims = shape.rank(); + auto dimensions = shape.dimensions(); + + std::vector broadcast_dims(num_dims - 1); + std::iota(broadcast_dims.begin(), broadcast_dims.end(), 0); + broadcast_dims[num_dims - 2] = num_dims - 1; + result.w = BroadcastInDim(result.w, dimensions, broadcast_dims); + + XlaOp sort_result = + Sort({result.w, result.v}, + CreateScalarLtComputation( + {shape.element_type(), shape.element_type()}, builder), + num_dims - 1); + result.w = GetMatrixDiagonal(GetTupleElement(sort_result, 0)); + result.v = GetTupleElement(sort_result, 1); + return result; +} + } // namespace // This is the cyclic Jacobi iteration. Please note that the eigenvalues are @@ -373,11 +395,11 @@ StatusOr> WhileLoopFn( // // TODO(kuny): Implement parallel order Jacobi. // -SelfAdjointEigenResult SelfAdjointEigen(XlaOp a, bool lower, int64 max_iter, - float epsilon) { +SelfAdjointEigResult SelfAdjointEig(XlaOp a, bool lower, int64 max_iter, + float epsilon) { XlaBuilder* builder = a.builder(); auto return_error = [&](const Status& status) { - SelfAdjointEigenResult result; + SelfAdjointEigResult result; result.v = builder->ReportError(status); result.w = builder->ReportError(status); return result; @@ -439,11 +461,11 @@ SelfAdjointEigenResult SelfAdjointEigen(XlaOp a, bool lower, int64 max_iter, auto output = output_with_status.ValueOrDie(); - SelfAdjointEigenResult result; + SelfAdjointEigResult result; result.v = output[1]; result.w = GetMatrixDiagonal(output[2]); - return result; + return SortByEigenvalues(result).ValueOrDie(); } } // namespace xla diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.h similarity index 71% rename from tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h rename to tensorflow/compiler/xla/client/lib/self_adjoint_eig.h index 49fc17aa27..2a089891d6 100644 --- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h +++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_ -#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_ +#ifndef TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_ +#define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_ #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -23,20 +23,18 @@ namespace xla { // The eigenvalue decomposition of a symmetric matrix, the original matrix is // recovered by v * w * v_t. -struct SelfAdjointEigenResult { +struct SelfAdjointEigResult { // The i-th column is the normalized eigenvector corresponding to the // eigenvalue w[i]. Will return a matrix object if a is a matrix object. XlaOp v; - // TODO(kuny): Sort the eigenvalues. // The eigenvalues in ascending order, each repeated according to its // multiplicity. XlaOp w; }; -SelfAdjointEigenResult SelfAdjointEigen(XlaOp a, bool lower = true, - int64 max_iter = 100, - float epsilon = 1e-6); +SelfAdjointEigResult SelfAdjointEig(XlaOp a, bool lower = true, + int64 max_iter = 100, float epsilon = 1e-6); } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIGEN_H_ +#endif // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SELF_ADJOINT_EIG_H_ diff --git a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc b/tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc similarity index 84% rename from tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc rename to tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc index aa8fa816c0..c8875dff7b 100644 --- a/tensorflow/compiler/xla/client/lib/self_adjoint_eigen_test.cc +++ b/tensorflow/compiler/xla/client/lib/self_adjoint_eig_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/client/lib/self_adjoint_eigen.h" +#include "tensorflow/compiler/xla/client/lib/self_adjoint_eig.h" #include "tensorflow/compiler/xla/array2d.h" #include "tensorflow/compiler/xla/array3d.h" @@ -32,7 +32,7 @@ limitations under the License. namespace xla { -class SelfAdjointEigenTest : public ClientLibraryTestBase { +class SelfAdjointEigTest : public ClientLibraryTestBase { protected: void SetUp() override { ClientLibraryTestBase::SetUp(); @@ -71,7 +71,7 @@ class SelfAdjointEigenTest : public ClientLibraryTestBase { } void TearDown() override { ClientLibraryTestBase::TearDown(); } - Array3D get_unit_matrix_3d(const Array3D& matrix) { + Array3D GetUnitMatrix3D(const Array3D& matrix) { Array3D result(matrix.n1(), matrix.n2(), matrix.n3(), 0.0); for (int i = 0; i < matrix.n1(); ++i) { for (int j = 0; j < matrix.n2(); ++j) { @@ -100,7 +100,7 @@ class SelfAdjointEigenTest : public ClientLibraryTestBase { return result; } - XlaOp ComputeMatmulVWVt(SelfAdjointEigenResult result, XlaBuilder* builder) { + XlaOp ComputeMatmulVWVt(SelfAdjointEigResult result, XlaBuilder* builder) { Shape shape = builder->GetShape(result.v).ValueOrDie(); std::vector out_dims = shape.dimensions(); std::vector broadcast_dims(shape.rank() - 1); @@ -140,69 +140,69 @@ class SelfAdjointEigenTest : public ClientLibraryTestBase { Array2D wrong_type_4x4_; }; -XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_2x4x4) { +XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_2x4x4) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR3Parameter(batch_3d_4x4_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); ComputeMatmulVWVt(result, &builder); ComputeAndCompareR3(&builder, batch_3d_4x4_, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_Lower_2x4x4) { +XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_Lower_2x4x4) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR3Parameter( ExtractTriangularMatrix(batch_3d_4x4_, true), 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); ComputeMatmulVWVt(result, &builder); ComputeAndCompareR3(&builder, batch_3d_4x4_, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_VWVt_EQ_A_Upper_2x4x4) { +XLA_TEST_F(SelfAdjointEigTest, Test_VWVt_EQ_A_Upper_2x4x4) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR3Parameter( ExtractTriangularMatrix(batch_3d_4x4_, false), 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a, false); + auto result = SelfAdjointEig(a, false); ComputeMatmulVWVt(result, &builder); ComputeAndCompareR3(&builder, batch_3d_4x4_, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_Orthogonality_2x4x4) { +XLA_TEST_F(SelfAdjointEigTest, Test_Orthogonality_2x4x4) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR3Parameter(batch_3d_4x4_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); BatchDot(result.v, TransposeInMinorDims(result.v), PrecisionConfig::HIGHEST); - ComputeAndCompareR3(&builder, get_unit_matrix_3d(batch_3d_4x4_), + ComputeAndCompareR3(&builder, GetUnitMatrix3D(batch_3d_4x4_), {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_VtWV_EQ_A_Rank_Deficient_4x4) { +XLA_TEST_F(SelfAdjointEigTest, Test_VtWV_EQ_A_Rank_Deficient_4x4) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR2Parameter(low_rank_4x4_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); ComputeMatmulVWVt(result, &builder); ComputeAndCompareR2(&builder, low_rank_4x4_, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_Eigen_8x8) { +XLA_TEST_F(SelfAdjointEigTest, Test_Eigen_8x8) { XlaBuilder builder(TestName()); // This is computed by numpy.linalg.eigh with float32. @@ -211,21 +211,21 @@ XLA_TEST_F(SelfAdjointEigenTest, Test_Eigen_8x8) { XlaOp a; auto a_data = CreateR2Parameter(matrix2d_8x8_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); - Sort(result.w); + auto result = SelfAdjointEig(a); + Add(result.w, ZerosLike(result.w)); ComputeAndCompareR1(&builder, expected, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Test_Orthogonality_8x8) { +XLA_TEST_F(SelfAdjointEigTest, Test_Orthogonality_8x8) { XlaBuilder builder(TestName()); float expected_vals = 1e-3; XlaOp a; auto a_data = CreateR2Parameter(matrix2d_8x8_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); // np.sum(norm(eye(n) - matmul(conj(T(v)), v)) / n**2 GetAverageAbsoluteError(IdentityMatrix(&builder, F32, 8, 8), BatchDot(TransposeInMinorDims(result.v), result.v), @@ -235,75 +235,75 @@ XLA_TEST_F(SelfAdjointEigenTest, Test_Orthogonality_8x8) { ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Wrong_Type_Int) { +XLA_TEST_F(SelfAdjointEigTest, Wrong_Type_Int) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR2Parameter(wrong_type_4x4_, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); EXPECT_FALSE(result.v.valid()); EXPECT_FALSE(result.w.valid()); } -XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_8x8) { +XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_8x8) { XlaBuilder builder(TestName()); int size = 8; Array2D a_val = GenerateRandomSymmetricMatrix(size); XlaOp a; auto a_data = CreateR2Parameter(a_val, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder); ComputeAndCompareR0(&builder, 1e-3, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_16x16) { +XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_16x16) { XlaBuilder builder(TestName()); int size = 16; Array2D a_val = GenerateRandomSymmetricMatrix(size); XlaOp a; auto a_data = CreateR2Parameter(a_val, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder); ComputeAndCompareR0(&builder, 1e-3, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_32x32) { +XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_32x32) { XlaBuilder builder(TestName()); int size = 32; Array2D a_val = GenerateRandomSymmetricMatrix(size); XlaOp a; auto a_data = CreateR2Parameter(a_val, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder); ComputeAndCompareR0(&builder, 1e-3, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_256x256) { +XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_256x256) { XlaBuilder builder(TestName()); int size = 256; Array2D a_val = GenerateRandomSymmetricMatrix(size); XlaOp a; auto a_data = CreateR2Parameter(a_val, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder); ComputeAndCompareR0(&builder, 1e-3, {a_data.get()}, ErrorSpec(1e-3, 1e-3)); } -XLA_TEST_F(SelfAdjointEigenTest, Various_Size_Random_Matrix_512x512) { +XLA_TEST_F(SelfAdjointEigTest, Various_Size_Random_Matrix_512x512) { XlaBuilder builder(TestName()); int size = 512; Array2D a_val = GenerateRandomSymmetricMatrix(size); XlaOp a; auto a_data = CreateR2Parameter(a_val, 0, "a", &builder, &a); - auto result = SelfAdjointEigen(a); + auto result = SelfAdjointEig(a); GetAverageAbsoluteError(ComputeMatmulVWVt(result, &builder), a, &builder); ComputeAndCompareR0(&builder, 1e-3, {a_data.get()}, diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index e142bbb330..999863add4 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3285,7 +3285,7 @@ cuda_py_test( data = ["//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files"], shard_count = 20, tags = ["no_windows"], - xla_enable_strict_auto_jit = True, + # TODO(kuny): Add xla_enable_strict_auto_jit = True after b/124377352 is fixed. ) cuda_py_test( -- GitLab From 938676970a36fd569223ed588925edc6bb411730 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 13 Feb 2019 13:59:30 -0800 Subject: [PATCH 076/351] [XLA] Add replicated execution for HloTestBase PiperOrigin-RevId: 233824321 --- tensorflow/compiler/xla/tests/hlo_test_base.cc | 11 +++++++++++ tensorflow/compiler/xla/tests/hlo_test_base.h | 5 +++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index d9d54fd255..0151981ef1 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -205,6 +205,17 @@ Literal HloTestBase::ExecuteAndTransfer(std::unique_ptr module, return test_runner_.Execute(std::move(module), arguments).ValueOrDie(); } +StatusOr> HloTestBase::ExecuteReplicated( + std::unique_ptr module, absl::Span arguments, + int64 num_replicas) { + HloRunner::ReplicatedExecuteOptions options; + options.num_replicas = num_replicas; + for (auto argument : arguments) { + options.arguments.push_back(argument); + } + return test_runner_.ExecuteReplicated(std::move(module), options); +} + StatusOr> HloTestBase::MakeReferenceModule( const HloModule& test_module, const std::function& reference_preprocessor) { diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 78bdd336e0..3c2bcbb5df 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -173,6 +173,11 @@ class HloTestBase : public ::testing::Test { Literal ExecuteAndTransfer(std::unique_ptr module, absl::Span arguments); + // Executes the given module on multiple replicas. + StatusOr> ExecuteReplicated( + std::unique_ptr module, absl::Span arguments, + int64 num_replicas); + // Executes the given hlo module on two backends and compares results. // // 'arguments': the input of the hlo module. -- GitLab From 9670259488f1681987aa5001bc0144060f5b714d Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Wed, 13 Feb 2019 14:08:52 -0800 Subject: [PATCH 077/351] Correctly set layouts for resource variables. Previously if a resource variable has non-descending layout, the layout will not be set correctly and a default layout will be used. This changes fixes this issue by: - Add a xla shape field to represent on device shape and layout. - Set the above field when assigning to a resource variable. - When generating program, use the layout in the above field to set the layout of the program. PiperOrigin-RevId: 233826352 --- tensorflow/compiler/tf2xla/xla_compiler.cc | 24 +++-- .../compiler/tf2xla/xla_compiler_test.cc | 91 +++++++++++++++++++ tensorflow/compiler/tf2xla/xla_op_kernel.cc | 1 + tensorflow/compiler/tf2xla/xla_resource.h | 15 +++ 4 files changed, 122 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 0833264523..3221ec5b72 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -185,9 +185,10 @@ Status BuildComputation( std::vector elems; elems.reserve(retvals.size()); - // Keeps track of which retvals have layout to update. The first element is - // the output index, second element is the new layout. - std::vector> retval_to_update_layout; + // Keeps track of the layout of each retval. If a retval is not in this list, + // a descending layout is used. The first element is the output index, second + // element is the new layout. + std::vector> retval_index_and_layout; for (int i = 0; i < retvals.size(); ++i) { XlaCompiler::OutputDescription& output = (*outputs)[i]; const XlaExpression& retval = retvals[i]; @@ -216,7 +217,7 @@ Status BuildComputation( TF_ASSIGN_OR_RETURN(xla::Shape shape, shape_representation_fn( output.shape, output.type)); value = xla::Reshape(value, xla::AsInt64Slice(shape.dimensions())); - retval_to_update_layout.emplace_back(elems.size(), shape.layout()); + retval_index_and_layout.emplace_back(elems.size(), shape.layout()); } else if (it != retval_cores.end()) { // Apply the sharding to the output, if there is a core assignment. value = identity_op(value); @@ -289,6 +290,11 @@ Status BuildComputation( // Ensures the correct sharding is applied to the output. handle = identity_op(handle); + // Set layout of the retval to device representation layout. + if (resource->representation_shape().has_value()) { + retval_index_and_layout.emplace_back( + elems.size(), resource->representation_shape()->layout()); + } elems.push_back(handle); } } @@ -318,15 +324,15 @@ Status BuildComputation( computation->GetProgramShape()); *output_shape = program_shape.result(); // Update the output layout to the layout of retval. - for (auto& update : retval_to_update_layout) { + for (auto& index_and_layout : retval_index_and_layout) { if (!always_return_tuple && elems.size() == 1) { - *output_shape->mutable_layout() = update.second; + *output_shape->mutable_layout() = index_and_layout.second; continue; } - xla::Shape* output_sub_shape = - xla::ShapeUtil::GetMutableSubshape(output_shape, {update.first}); - *output_sub_shape->mutable_layout() = update.second; + xla::Shape* output_sub_shape = xla::ShapeUtil::GetMutableSubshape( + output_shape, {index_and_layout.first}); + *output_sub_shape->mutable_layout() = index_and_layout.second; } return Status::OK(); } diff --git a/tensorflow/compiler/tf2xla/xla_compiler_test.cc b/tensorflow/compiler/tf2xla/xla_compiler_test.cc index 492010f731..b31137867d 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler_test.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler_test.cc @@ -277,6 +277,97 @@ TEST_F(XlaCompilerTest, OutOfOrderGraph) { EXPECT_TRUE(xla::LiteralTestUtil::Equal(param0_literal, actual_literal)); } +// Tests that the compiler can correctly propagate the layout assigned by +// shape_representation_fn_ to return types. +TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForRetVal) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); + auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1); + // Adds an identity op around the resource to make sure identity ops propagate + // resources correctly. + auto identity = ops::Identity(scope.WithOpName("VIdentity"), var); + auto write = ops::AssignAddVariableOp(scope, identity, a); + auto read = ops::ReadVariableOp( + scope.WithControlDependencies(std::vector{write}), var, + DT_INT32); + auto read_plus_one = ops::Add(scope, read, ops::Const(scope, 1)); + auto d = ops::_Retval(scope.WithOpName("D"), read_plus_one, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(2); + args[0].kind = XlaCompiler::Argument::kParameter; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2, 3}); + args[1].kind = XlaCompiler::Argument::kResource; + args[1].resource_kind = XlaResource::kVariable; + args[1].initialized = true; + args[1].type = DT_INT32; + args[1].shape = TensorShape({2, 3}); + + auto options = DefaultOptions(); + options.shape_representation_fn = + [](const TensorShape& shape, DataType dt) -> xla::StatusOr { + xla::Shape xla_shape; + TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dt, shape, &xla_shape)); + *xla_shape.mutable_layout() = xla::LayoutUtil::MakeLayout({0, 1}); + return xla_shape; + }; + // Compiles the graph. + XlaCompiler compiler(options); + + XlaCompiler::CompilationResult result; + TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "add", + std::move(graph), args, &result)); + xla::Shape transposed = + xla::ShapeUtil::MakeShapeWithLayout(xla::S32, {2, 3}, {0, 1}); + // Check that the return shapes are correctly tranposed. + EXPECT_EQ(result.xla_output_shape, + xla::ShapeUtil::MakeTupleShape({transposed, transposed})); +} + +// The layout of resource variable shouldn't change after transpose +TEST_F(XlaCompilerTest, TransposeVariables) { + Scope scope = Scope::NewRootScope().ExitOnError(); + auto a = ops::_Arg(scope.WithOpName("A"), DT_INT32, 0); + auto var = ops::_Arg(scope.WithOpName("V"), DT_RESOURCE, 1); + // Adds an identity op around the resource to make sure identity ops propagate + // resources correctly. + auto identity = ops::Identity(scope.WithOpName("VIdentity"), var); + auto write = ops::AssignAddVariableOp(scope, identity, a); + auto read = ops::ReadVariableOp( + scope.WithControlDependencies(std::vector{write}), var, + DT_INT32); + auto transposed_read = ops::Transpose(scope, read, {1, 0}); + auto reshape = ops::Reshape(scope, transposed_read, {2, 3}); + auto d = ops::_Retval(scope.WithOpName("D"), reshape, 0); + std::unique_ptr graph(new Graph(OpRegistry::Global())); + TF_ASSERT_OK(scope.ToGraph(graph.get())); + + // Builds a description of the arguments. + std::vector args(2); + args[0].kind = XlaCompiler::Argument::kParameter; + args[0].type = DT_INT32; + args[0].shape = TensorShape({2, 3}); + args[1].kind = XlaCompiler::Argument::kResource; + args[1].resource_kind = XlaResource::kVariable; + args[1].initialized = true; + args[1].type = DT_INT32; + args[1].shape = TensorShape({2, 3}); + // Compiles the graph. + XlaCompiler compiler(DefaultOptions()); + + XlaCompiler::CompilationResult result; + TF_ASSERT_OK(compiler.CompileGraph(XlaCompiler::CompileOptions(), "transpose", + std::move(graph), args, &result)); + xla::Shape transposed = + xla::ShapeUtil::MakeShapeWithLayout(xla::S32, {2, 3}, {1, 0}); + // Check that the return shapes are correctly tranposed. + EXPECT_EQ(result.xla_output_shape, + xla::ShapeUtil::MakeTupleShape({transposed, transposed})); +} + // Tests that the compiler doesn't reorder the parameters. TEST_F(XlaCompilerTest, MixedOrderArguments) { for (bool swap_order : {false, true}) { diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index e36128831b..0c80b26dff 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -513,6 +513,7 @@ Status AssignVariableTensor(const Tensor& tensor, DataType type, handle = xla::Reshape(handle, xla::AsInt64Slice(representation_shape.dimensions())); } + variable->SetRepresentationShape(representation_shape); return variable->SetValue(handle); } diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h index 736588bb8b..ab3a5bdd9b 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.h +++ b/tensorflow/compiler/tf2xla/xla_resource.h @@ -86,6 +86,12 @@ class XlaResource { // variables have new values that need to be written back. const xla::XlaOp& initial_value() const { return initial_value_; } + // An xla shape that indicates how this resource variable is represented on + // device. + const absl::optional& representation_shape() const { + return representation_shape_; + } + // A variable is initialized if it has a value. bool initialized() const { return value_.valid(); } @@ -100,6 +106,11 @@ class XlaResource { // Sets the current value of the resource to an all-zero value. Status SetZeroValue(xla::XlaBuilder* builder); + // Sets the representational shape of the resource on device. + void SetRepresentationShape(const xla::Shape& shape) { + representation_shape_ = absl::make_optional(shape); + } + // Looks up the gradient for `source`, or creates it if it does not already // exist. The call target must be an initialized TensorArray resource. A // TensorArray can have multiple named gradients; see the operator @@ -160,6 +171,10 @@ class XlaResource { xla::XlaOp value_; xla::XlaOp initial_value_; + // An xla shape that indicates how this resource variable is represented on + // device. + absl::optional representation_shape_; + int64 max_array_size_ = -1; bool tensor_array_multiple_writes_aggregate_ = false; -- GitLab From 4586a1064c425c13836b3a20d5d978e0b5b15e1e Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Wed, 13 Feb 2019 14:12:05 -0800 Subject: [PATCH 078/351] Don't nest stop_gradient calls. Python 3.4 still took five arguments to ast.Call. Change from conditional on PY3 to try/except to accomodate that. PiperOrigin-RevId: 233826970 --- .../tools/compatibility/tf_upgrade_v2.py | 25 ++++++++++------ .../tools/compatibility/tf_upgrade_v2_test.py | 30 +++++++++++++++++++ 2 files changed, 46 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index bffedcdb3b..f137901947 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -23,7 +23,6 @@ import functools import sys import pasta -import six from tensorflow.tools.compatibility import ast_edits from tensorflow.tools.compatibility import renames_v2 @@ -1570,11 +1569,18 @@ def _softmax_cross_entropy_with_logits_transformer( """Wrap labels argument with stop_gradients.""" def _wrap_label(parent, old_value): """Wrap labels with tf.stop_gradient.""" - if six.PY3: + already_stop_grad = (isinstance(old_value, ast.Call) and + isinstance(old_value.func, ast.Attribute) and + old_value.func.attr == "stop_gradient" and + isinstance(old_value.func.value, ast.Name) and + old_value.func.value.id == "tf") + if already_stop_grad: + return False + try: new_value = ast.Call( ast.Name(id="tf.stop_gradient", ctx=ast.Load()), [old_value], []) - else: + except TypeError: new_value = ast.Call( ast.Name(id="tf.stop_gradient", ctx=ast.Load()), [old_value], [], None, None) @@ -1582,16 +1588,17 @@ def _softmax_cross_entropy_with_logits_transformer( # This copies the prefix and suffix on old_value to new_value. pasta.ast_utils.replace_child(parent, old_value, new_value) ast.copy_location(new_value, old_value) + return True # Check if we have a labels keyword arg for karg in node.keywords: if karg.arg == "labels": - logs.append((ast_edits.INFO, node.lineno, node.col_offset, - "Changing labels arg of " - "tf.nn.softmax_cross_entropy_with_logits to " - "tf.stop_gradient(labels). Please check this " - "transformation.\n")) - _wrap_label(karg, karg.value) + if _wrap_label(karg, karg.value): + logs.append((ast_edits.INFO, node.lineno, node.col_offset, + "Changing labels arg of " + "tf.nn.softmax_cross_entropy_with_logits to " + "tf.stop_gradient(labels). Please check this " + "transformation.\n")) return node return node diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index c78db3fdf6..236f35911b 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -818,6 +818,36 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map _, unused_report, unused_errors, new_text = self._upgrade(text) self.assertEqual(expected_text, new_text) + def testSoftMaxCrossEntropyWithLogitsDoesntNest(self): + text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(labels), logits=logits, dim=2)") + expected_text = ( + "tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(labels), logits=logits, axis=2)") + _, unused_report, unused_errors, new_text = self._upgrade(text) + self.assertEqual(new_text, expected_text) + + text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(foo(bar)))") + expected_text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(foo(bar)))") + _, unused_report, unused_errors, new_text = self._upgrade(text) + self.assertEqual(expected_text, new_text) + + text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=foo())") + expected_text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(foo()))") + _, unused_report, unused_errors, new_text = self._upgrade(text) + self.assertEqual(expected_text, new_text) + + text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=foo().zz())") + expected_text = ("tf.nn.softmax_cross_entropy_with_logits(" + "labels=tf.stop_gradient(foo().zz()))") + _, unused_report, unused_errors, new_text = self._upgrade(text) + self.assertEqual(expected_text, new_text) + def testSparseMatmul(self): text = ("tf.sparse_matmul(a, b, c, d, e, f, g)\n") expected_text = ("tf.linalg.matmul(a=a, b=b, transpose_a=c, transpose_b=d, " -- GitLab From 186e958b99dabe61fc74cd2d5d761d0b916aab14 Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 13 Feb 2019 14:14:28 -0800 Subject: [PATCH 079/351] Introduce a calibration flag that allows to run calibration with quantization. PiperOrigin-RevId: 233827492 --- tensorflow/lite/python/BUILD | 1 + tensorflow/lite/python/lite.py | 125 +++++++++++++++++- tensorflow/lite/python/lite_test.py | 72 +++++++++- .../golden/v1/tensorflow.lite.-optimize.pbtxt | 12 ++ ...sorflow.lite.-representative-dataset.pbtxt | 9 ++ .../tools/api/golden/v1/tensorflow.lite.pbtxt | 8 ++ .../golden/v2/tensorflow.lite.-optimize.pbtxt | 12 ++ ...sorflow.lite.-representative-dataset.pbtxt | 9 ++ .../tools/api/golden/v2/tensorflow.lite.pbtxt | 8 ++ 9 files changed, 248 insertions(+), 8 deletions(-) create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 02b8b80be9..6e1f5adc7e 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -63,6 +63,7 @@ py_library( ":interpreter", ":lite_constants", ":op_hint", + "//tensorflow/lite/python/optimize:calibrator", "//tensorflow/python:graph_util", "//tensorflow/python:tf_optimizer", "//tensorflow/python/keras", diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 3b0aa02b7c..40efbe5392 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -32,7 +32,8 @@ EXPERIMENTAL: APIs here are unstable and likely to change without notice. from __future__ import absolute_import from __future__ import division from __future__ import print_function - +import warnings +import enum from six import PY3 from google.protobuf import text_format as _text_format @@ -52,6 +53,7 @@ from tensorflow.lite.python.convert_saved_model import set_tensor_shapes as _set from tensorflow.lite.python.interpreter import Interpreter # pylint: disable=unused-import from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs # pylint: disable=unused-import from tensorflow.lite.python.op_hint import OpHint # pylint: disable=unused-import +from tensorflow.lite.python.optimize import calibrator as _calibrator from tensorflow.core.framework import graph_pb2 as _graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 as _rewriter_config_pb2 from tensorflow.core.protobuf import config_pb2 as _config_pb2 @@ -100,6 +102,59 @@ def _run_graph_optimizations(graph_def, input_arrays, output_arrays): return _tf_optimizer.OptimizeGraph(config, meta_graph) +@_tf_export("lite.Optimize") +class Optimize(enum.Enum): + """Enum defining the optimizations to apply when generating tflite graphs. + + Some optimizations may come at the cost of accuracy. + """ + + # Optimize for size. + # + # Optimizations that reduce the size of the model. + # The model size will be reduced. Optimizations can include quantizing the + # weights of the floating point model. + OPTIMIZE_FOR_SIZE = "OPTIMIZE_FOR_SIZE" + + # Optimize for latency. + # + # Optimizations that reduce the latency of the model. + # The model latency will be reduced. Optimizations can include quantizing the + # weights of the floating point model. + OPTIMIZE_FOR_LATENCY = "OPTIMIZE_FOR_LATENCY" + + def __str__(self): + return self.value + + +@_tf_export("lite.RepresentativeDataset") +class RepresentativeDataset(object): + """Representative dataset to evaluate optimizations. + + A representative dataset that can be used to evaluate optimizations by the + converter. E.g. converter can use these examples to estimate (min, max) ranges + by calibrating the model on inputs. This can allow converter to quantize a + converted floating point model. + """ + + def __init__(self, input_gen, output_gen=None): + """Creates a representative dataset. + + Args: + input_gen: an input generator that can be used to generate input samples + for the model. This must be a callable object that returns an object + that supports the `iter()` protocol (e.g. a generator function). The + elements generated must have same type and shape as inputs to the model. + output_gen: (optional) an output generator that can be used to generate + output samples for the model. This must be a callable object that + returns an object that supports the `iter()` protocol (e.g. a generator + function). The elements generated must have same type and shape as + outputs to the model. (default None) + """ + self.input_gen = input_gen + self.output_gen = output_gen + + @_tf_export("lite.TFLiteConverter") class TFLiteConverter(object): """Convert a TensorFlow model into `output_format` using TOCO. @@ -141,10 +196,11 @@ class TFLiteConverter(object): created for any op that is unknown. The developer will need to provide these to the TensorFlow Lite runtime with a custom resolver. (default False) - post_training_quantize: Boolean indicating whether to quantize the weights - of the converted float model. Model size will be reduced and there will be - latency improvements (at the cost of accuracy). - (default False) + post_training_quantize: deprecated, please specify + `[optimize.OPTIMIZE_FOR_SIZE]` for `optimizations` instead. Boolean + indicating whether to quantize the weights of the converted float model. + Model size will be reduced and there will be latency improvements + (at the cost of accuracy). (default False) dump_graphviz_dir: Full filepath of folder to dump the graphs at various stages of processing GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order to keep the requirements of the @@ -154,6 +210,16 @@ class TFLiteConverter(object): target_ops: Experimental flag, subject to change. Set of OpsSet options indicating which converter to use. (default set([OpsSet.TFLITE_BUILTINS])) + optimizations: Experimental flag, subject to change, A list of + optimizations to apply when converting the model. The converter applies + the optimizations by giving priority to the optimizations specified + earlier in the list. E.g. + `[optimize.OPTIMIZE_FOR_SIZE, optimize.OPTIMIZE_FOR_LATENCY]` requires + the converter to do both size and latency optimizations giving priority + to size optimizations over latency optimizations. + representative_dataset: a representative dataset that can be used to + generate input and output samples for the model. The converter can use + the dataset to evaluate different optimizations. Example usage: @@ -216,10 +282,12 @@ class TFLiteConverter(object): self.reorder_across_fake_quant = False self.change_concat_input_ranges = False self.allow_custom_ops = False - self.post_training_quantize = False + self._post_training_quantize = False self.dump_graphviz_dir = None self.dump_graphviz_video = False self.target_ops = set([OpsSet.TFLITE_BUILTINS]) + self.representative_dataset = None + self.optimizations = [] # Attributes are used by models that cannot be loaded into TensorFlow. if not self._has_valid_tensors(): @@ -419,6 +487,25 @@ class TFLiteConverter(object): graph_def = _freeze_graph(sess, output_tensors) return cls(graph_def, input_tensors, output_tensors) + def __setattr__(self, name, value): + if name == "post_training_quantize": + warnings.warn("Property %s is deprecated, " + "please use set_converter_mode instead." % name) + if value: + # Use OPTIMIZE_FOR_SIZE for post training for now. + self.optimizations = [Optimize.OPTIMIZE_FOR_SIZE] + else: + self.optimizations = [] + return + object.__setattr__(self, name, value) + + def __getattribute__(self, name): + if name == "post_training_quantize": + warnings.warn("Property %s is deprecated, " + "please use get_converter_mode instead." % name) + return Optimize.OPTIMIZE_FOR_SIZE in set(self.optimizations) + return object.__getattribute__(self, name) + def convert(self): """Converts a TensorFlow GraphDef based on instance variables. @@ -463,6 +550,24 @@ class TFLiteConverter(object): "tensors '{0}'.".format(",".join(invalid_stats))) else: quantized_stats = None + if self.representative_dataset: + if not isinstance(self.representative_dataset, RepresentativeDataset): + raise TypeError( + "representative_dataset must be an instance of " + "RepresentativeDataset") + if self.representative_dataset.input_gen is None: + raise ValueError( + "Provide an input generator for representative_dataset") + + # TODO(shashishekhar): For now use optimizations order is ignored. + # Both size and latency optimizations decide whether to apply post + # training optimizations. + post_training_optimize = bool( + len(set(self.optimizations) & set([Optimize.OPTIMIZE_FOR_LATENCY, + Optimize.OPTIMIZE_FOR_SIZE]))) + # Do weights only quantization if there is no dataset for calibration. + weights_only_quantize_flag = ( + post_training_optimize and (self.representative_dataset is None)) converter_kwargs = { "inference_type": self.inference_type, @@ -475,7 +580,7 @@ class TFLiteConverter(object): "reorder_across_fake_quant": self.reorder_across_fake_quant, "change_concat_input_ranges": self.change_concat_input_ranges, "allow_custom_ops": self.allow_custom_ops, - "post_training_quantize": self.post_training_quantize, + "post_training_quantize": weights_only_quantize_flag, "target_ops": self.target_ops, "dump_graphviz_dir": self.dump_graphviz_dir, "dump_graphviz_video": self.dump_graphviz_video @@ -504,6 +609,12 @@ class TFLiteConverter(object): input_arrays_with_shape=self._input_arrays_with_shape, output_arrays=self._output_arrays, **converter_kwargs) + + if self.representative_dataset and post_training_optimize: + calibrate_quantize = _calibrator.Calibrator(result) + result = calibrate_quantize.calibrate_and_quantize( + self.representative_dataset.input_gen) + return result def get_input_arrays(self): diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index ca6c5b8f13..d41b7a75fd 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops.variables import global_variables_initializer as _global_variables_initializer from tensorflow.python.platform import gfile @@ -481,6 +482,29 @@ class FromSessionTest(test_util.TensorFlowTestCase): self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all()) self.assertTrue(output_details[0]['quantization'][0] > 0) # scale + def testPostTrainingQuantizeDeprecatedAttribute(self): + in_tensor_1 = array_ops.placeholder( + shape=[33, 33], dtype=dtypes.float32, name='inputA') + in_tensor_2 = constant_op.constant( + np.random.uniform(low=-10., high=10., size=(33, 33)), + shape=[33, 33], + dtype=dtypes.float32, + name='inputB') + out_tensor = math_ops.matmul(in_tensor_1, in_tensor_2, name='output') + sess = session.Session() + + quantized_converter = lite.TFLiteConverter.from_session( + sess, [in_tensor_1], [out_tensor]) + self.assertFalse(quantized_converter.post_training_quantize) + + quantized_converter.post_training_quantize = True + self.assertTrue(quantized_converter.post_training_quantize) + self.assertEqual(quantized_converter.optimizations, + [lite.Optimize.OPTIMIZE_FOR_SIZE]) + + quantized_tflite = quantized_converter.convert() + self.assertTrue(quantized_tflite) + def testPostTrainingQuantize(self): np.random.seed(0) # We need the tensor to have more than 1024 elements for quantize_weights @@ -504,7 +528,53 @@ class FromSessionTest(test_util.TensorFlowTestCase): # Convert quantized weights model. quantized_converter = lite.TFLiteConverter.from_session( sess, [in_tensor_1], [out_tensor]) - quantized_converter.post_training_quantize = True + quantized_converter.optimizations = [lite.Optimize.OPTIMIZE_FOR_SIZE] + quantized_tflite = quantized_converter.convert() + self.assertTrue(quantized_tflite) + + # Ensure that the quantized weights tflite model is smaller. + self.assertTrue(len(quantized_tflite) < len(float_tflite)) + + def testPostTrainingCalibrateAndQuantize(self): + np.random.seed(0) + # Create a mobilenet like model. + output_channel = 16 + depth_multiplier = 1 + inp = array_ops.placeholder(dtype=dtypes.float32, shape=(1, 5, 5, 3)) + conv = nn_ops.conv2d( + inp, + filter=array_ops.zeros([3, 3, 3, output_channel]), + strides=[1, 1, 1, 1], + padding='SAME') + dconv = nn_ops.depthwise_conv2d_native( + conv, + filter=array_ops.zeros( + [16, 16, output_channel, output_channel * depth_multiplier]), + strides=[1, 1, 1, 1], + padding='SAME') + pool = nn_ops.pool( + dconv, window_shape=[2, 2], pooling_type='AVG', padding='SAME') + max_pool = nn_ops.pool( + pool, window_shape=[2, 2], pooling_type='MAX', padding='SAME') + output = nn_ops.softmax(max_pool) + + def calibration_gen(): + for _ in range(10): + yield np.random.uniform(-1, 1, size=(1, 5, 5, 3)).astype(np.float32) + + sess = session.Session() + + # Convert float model. + float_converter = lite.TFLiteConverter.from_session(sess, [inp], [output]) + float_tflite = float_converter.convert() + self.assertTrue(float_tflite) + + # Convert quantized weights model. + quantized_converter = lite.TFLiteConverter.from_session( + sess, [inp], [output]) + quantized_converter.optimizations = [lite.Optimize.OPTIMIZE_FOR_SIZE] + quantized_converter.representative_dataset = lite.RepresentativeDataset( + calibration_gen) quantized_tflite = quantized_converter.convert() self.assertTrue(quantized_tflite) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt new file mode 100644 index 0000000000..fedb5ee9fa --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-optimize.pbtxt @@ -0,0 +1,12 @@ +path: "tensorflow.lite.Optimize" +tf_class { + is_instance: "" + member { + name: "OPTIMIZE_FOR_LATENCY" + mtype: "" + } + member { + name: "OPTIMIZE_FOR_SIZE" + mtype: "" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt new file mode 100644 index 0000000000..d14b69531d --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.-representative-dataset.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.lite.RepresentativeDataset" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'input_gen\', \'output_gen\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt index 154dd00821..18664f7acb 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.lite.pbtxt @@ -12,6 +12,14 @@ tf_module { name: "OpsSet" mtype: "" } + member { + name: "Optimize" + mtype: "" + } + member { + name: "RepresentativeDataset" + mtype: "" + } member { name: "TFLiteConverter" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt new file mode 100644 index 0000000000..fedb5ee9fa --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-optimize.pbtxt @@ -0,0 +1,12 @@ +path: "tensorflow.lite.Optimize" +tf_class { + is_instance: "" + member { + name: "OPTIMIZE_FOR_LATENCY" + mtype: "" + } + member { + name: "OPTIMIZE_FOR_SIZE" + mtype: "" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt new file mode 100644 index 0000000000..d14b69531d --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.-representative-dataset.pbtxt @@ -0,0 +1,9 @@ +path: "tensorflow.lite.RepresentativeDataset" +tf_class { + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'input_gen\', \'output_gen\'], varargs=None, keywords=None, defaults=[\'None\'], " + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt index 154dd00821..18664f7acb 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.lite.pbtxt @@ -12,6 +12,14 @@ tf_module { name: "OpsSet" mtype: "" } + member { + name: "Optimize" + mtype: "" + } + member { + name: "RepresentativeDataset" + mtype: "" + } member { name: "TFLiteConverter" mtype: "" -- GitLab From f7c6c0b68e63aeec8469971750e2658520642a1f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 14:16:01 -0800 Subject: [PATCH 080/351] [TF:XLA] Enable parallel_for tests on XLA. One test does not test floor_div when running on XLA due to XLA's use of an Nvidia speed optimization for approximate division. This is a no-op for Tensorflow Classic. PiperOrigin-RevId: 233827805 --- tensorflow/python/ops/parallel_for/BUILD | 2 ++ tensorflow/python/ops/parallel_for/math_test.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/parallel_for/BUILD b/tensorflow/python/ops/parallel_for/BUILD index 05d2e4c7fc..b1e12b24b9 100644 --- a/tensorflow/python/ops/parallel_for/BUILD +++ b/tensorflow/python/ops/parallel_for/BUILD @@ -129,6 +129,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python/eager:backprop", ], + xla_enable_strict_auto_jit = True, ) cuda_py_test( @@ -143,6 +144,7 @@ cuda_py_test( "//tensorflow/python:util", ], tags = ["optonly"], # Too slow in non-opt mode + xla_enable_strict_auto_jit = True, ) py_library( diff --git a/tensorflow/python/ops/parallel_for/math_test.py b/tensorflow/python/ops/parallel_for/math_test.py index 7a5bef7229..8a081e194f 100644 --- a/tensorflow/python/ops/parallel_for/math_test.py +++ b/tensorflow/python/ops/parallel_for/math_test.py @@ -161,7 +161,6 @@ class MathTest(PForTestCase): math_ops.divide, math_ops.div_no_nan, math_ops.equal, - math_ops.floor_div, math_ops.floor_mod, math_ops.greater, math_ops.greater_equal, @@ -182,6 +181,10 @@ class MathTest(PForTestCase): safe_polygamma, safe_zeta, ] + # FloorDiv fails on XLA due floor's discontinuities exacerbating small + # division differences. + if not test_util.is_xla_enabled(): + float_ops += [math_ops.floor_div] for op in logical_ops + float_ops: x = random_ops.random_uniform([7, 3, 5]) y = random_ops.random_uniform([3, 5]) -- GitLab From d83aee43f1fbc4c6894fa3bf3fe0d0020afcbbd0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 15:32:59 -0800 Subject: [PATCH 081/351] Update ops-related pbtxt files. PiperOrigin-RevId: 233842801 --- .../core/ops/compat/ops_history.v1.pbtxt | 78 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 43 ++++++++++ 2 files changed, 121 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 8a6f41c88f..6f0a812992 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -12869,6 +12869,47 @@ op { } is_stateful: true } +op { + name: "CollectiveGather" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "data" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_HALF + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "group_size" + type: "int" + } + attr { + name: "group_key" + type: "int" + } + attr { + name: "instance_key" + type: "int" + } + attr { + name: "shape" + type: "shape" + } + is_stateful: true +} op { name: "CollectiveReduce" input_arg { @@ -19180,6 +19221,43 @@ op { } } } +op { + name: "DecodeRaw" + input_arg { + name: "bytes" + type: DT_STRING + } + output_arg { + name: "output" + type_attr: "out_type" + } + attr { + name: "out_type" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT16 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + attr { + name: "little_endian" + type: "bool" + default_value { + b: true + } + } +} op { name: "DecodeWav" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index aad5048bc4..607e6ee86c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -5437,6 +5437,47 @@ op { } is_stateful: true } +op { + name: "CollectiveGather" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "data" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_HALF + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "group_size" + type: "int" + } + attr { + name: "group_key" + type: "int" + } + attr { + name: "instance_key" + type: "int" + } + attr { + name: "shape" + type: "shape" + } + is_stateful: true +} op { name: "CollectiveReduce" input_arg { @@ -8882,6 +8923,8 @@ op { type: DT_INT16 type: DT_INT8 type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } -- GitLab From a4ff6790366f906926466d6486926b8e229e13ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 15:33:22 -0800 Subject: [PATCH 082/351] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 233842881 --- tensorflow/go/op/wrappers.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5ceff958ac..5e6dba04a7 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -4526,6 +4526,23 @@ func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_ return op.Output(0) } +// Mutually accumulates multiple tensors of identical type and shape. +func CollectiveGather(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} + opspec := tf.OpSpec{ + Type: "CollectiveGather", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // CollectiveReduceAttr is an optional argument to CollectiveReduce. type CollectiveReduceAttr func(optionalAttr) -- GitLab From e9129a758fd135c4fa19f95f40b70e6985774bfe Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Wed, 13 Feb 2019 15:48:36 -0800 Subject: [PATCH 083/351] [XLA] Simplify dot operations that have only batch and contracting dimension on the lhs or rhs. PiperOrigin-RevId: 233845773 --- .../xla/service/algebraic_simplifier.cc | 168 ++++++++++++++---- .../xla/service/algebraic_simplifier_test.cc | 46 +++-- .../compiler/xla/tests/dot_operation_test.cc | 2 + 3 files changed, 162 insertions(+), 54 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index c5deb74e96..9b037960cd 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -280,15 +280,51 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { hlo)); } - // Helper method to perform and add reduction in a single dimension. - HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) { + // Converts to primitive type if the input hlo is not that type, otherwise + // returns the original hlo. + HloInstruction* AsType(HloInstruction* hlo, + const PrimitiveType element_type) { + if (hlo->shape().element_type() == element_type) { + return hlo; + } + return computation_->AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo)); + } + + // Transposes a dot operand such that the batch dimensions are the msot major, + // and the contracting dimensions are most minor. + StatusOr NormalizeDotOperandToBatchMajorAndContractingMinor( + HloInstruction* dot_operand, absl::Span batch_dimensions, + absl::Span contracting_dimensions) { + std::vector transpose_dimensions(batch_dimensions.begin(), + batch_dimensions.end()); + for (int64 i = 0; i < dot_operand->shape().rank(); ++i) { + if (!(absl::c_linear_search(batch_dimensions, i) || + absl::c_linear_search(contracting_dimensions, i))) { + transpose_dimensions.push_back(i); + } + } + transpose_dimensions.insert(transpose_dimensions.end(), + contracting_dimensions.begin(), + contracting_dimensions.end()); + return MakeTransposeHlo(dot_operand, transpose_dimensions); + } + + // Helper method to perform and add reduction on a list of dimensions. + HloInstruction* AddReduce(HloInstruction* hlo, absl::Span dims) { HloInstruction* zero = computation_->AddInstruction(HloInstruction::CreateConstant( LiteralUtil::Zero(hlo->shape().element_type()).Clone())); HloComputation* AddReduce_computation = GetOrCreateScalarAddComputation(); - Shape shape = ShapeUtil::DeleteDimension(dim, hlo->shape()); + Shape shape = ShapeUtil::FilterDimensions( + [&](int64 dim) { return !absl::c_linear_search(dims, dim); }, + hlo->shape()); return computation_->AddInstruction(HloInstruction::CreateReduce( - shape, hlo, zero, {dim}, AddReduce_computation)); + shape, hlo, zero, dims, AddReduce_computation)); + } + + HloInstruction* AddReduce(HloInstruction* hlo, int64 dim) { + return AddReduce(hlo, std::vector{dim}); } // Convenience method for replacing an instruction with a bitcast. If operand @@ -1120,16 +1156,8 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( std::swap(rhs_collapsing_dim, rhs_kept_dim); } - auto as_type = [&](HloInstruction* hlo, const PrimitiveType element_type) { - if (hlo->shape().element_type() == element_type) { - return hlo; - } - return computation_->AddInstruction(HloInstruction::CreateConvert( - ShapeUtil::ChangeElementType(hlo->shape(), element_type), hlo)); - }; - auto reshape_if_necessary = [&](HloInstruction* hlo) { - hlo = as_type(hlo, dot->shape().element_type()); + hlo = AsType(hlo, dot->shape().element_type()); if (!ShapeUtil::SameDimensions(hlo->shape(), dot->shape())) { hlo = computation_->AddInstruction( HloInstruction::CreateReshape(dot->shape(), hlo)); @@ -1138,7 +1166,7 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( }; auto add_reduce_in_f32 = [&](HloInstruction* hlo, const int64 dim) { - return AddReduce(as_type(hlo, F32), dim); + return AddReduce(AsType(hlo, F32), dim); }; auto broadcast = [&](HloInstruction* hlo, const Shape& shape, @@ -1247,8 +1275,8 @@ StatusOr AlgebraicSimplifierVisitor::HandleDotStrengthReduction( return dims; }; - // If the contracting dimension is 1, remove the degnerate dimnesions from the - // lhs and rhs, broadcast each to the result shape and multiply. + // If the contracting dimension is 1, remove the degnerate dimnensions from + // the lhs and rhs, broadcast each to the result shape and multiply. if (lhs->shape().dimensions(lhs_collapsing_dim) == 1 && (rhs_kept_dim == rhs_rank - 1 || (rhs_collapsing_dim == rhs_rank - 1 && rhs_kept_dim == rhs_rank - 2))) { @@ -1608,34 +1636,26 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { // If there are no contracting dimensions, a dot can be rewritten as // mul(broadcast(transpose(x)),broadcast(transpose(y))) if (dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == 0) { - std::vector lhs_transpose( - dot->dot_dimension_numbers().lhs_batch_dimensions().begin(), - dot->dot_dimension_numbers().lhs_batch_dimensions().end()); - for (int64 i = 0; i < lhs->shape().rank(); ++i) { - if (!absl::c_linear_search( - dot->dot_dimension_numbers().lhs_batch_dimensions(), i)) { - lhs_transpose.push_back(i); - } - } - TF_ASSIGN_OR_RETURN(HloInstruction * new_lhs, - MakeTransposeHlo(lhs, lhs_transpose)); + TF_ASSIGN_OR_RETURN( + HloInstruction * new_lhs, + NormalizeDotOperandToBatchMajorAndContractingMinor( + lhs, + AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()), + AsInt64Slice( + dot->dot_dimension_numbers().lhs_contracting_dimensions()))); if (dot->shape().rank() != lhs->shape().rank()) { std::vector lhs_broadcast_dims(lhs->shape().rank()); absl::c_iota(lhs_broadcast_dims, 0); new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast( dot->shape(), new_lhs, lhs_broadcast_dims)); } - std::vector rhs_transpose( - dot->dot_dimension_numbers().rhs_batch_dimensions().begin(), - dot->dot_dimension_numbers().rhs_batch_dimensions().end()); - for (int64 i = 0; i < rhs->shape().rank(); ++i) { - if (!absl::c_linear_search( - dot->dot_dimension_numbers().rhs_batch_dimensions(), i)) { - rhs_transpose.push_back(i); - } - } - TF_ASSIGN_OR_RETURN(HloInstruction * new_rhs, - MakeTransposeHlo(rhs, rhs_transpose)); + TF_ASSIGN_OR_RETURN( + HloInstruction * new_rhs, + NormalizeDotOperandToBatchMajorAndContractingMinor( + rhs, + AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()), + AsInt64Slice( + dot->dot_dimension_numbers().rhs_contracting_dimensions()))); if (dot->shape().rank() != rhs->shape().rank()) { std::vector rhs_broadcast_dims( dot->dot_dimension_numbers().lhs_batch_dimensions_size()); @@ -1651,6 +1671,78 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { new_lhs, new_rhs)); } + // If the lhs or rhs have only batch and contracting dimensions, a dot can be + // rewritten as reduce(mul(broadcast(transpose(x)),broadcast(transpose(y)))) + if ((dot->dot_dimension_numbers().lhs_batch_dimensions_size() + + dot->dot_dimension_numbers().lhs_contracting_dimensions_size() == + lhs->shape().rank()) || + (dot->dot_dimension_numbers().rhs_contracting_dimensions_size() + + dot->dot_dimension_numbers().rhs_batch_dimensions_size() == + rhs->shape().rank())) { + TF_ASSIGN_OR_RETURN( + HloInstruction * new_lhs, + NormalizeDotOperandToBatchMajorAndContractingMinor( + lhs, + AsInt64Slice(dot->dot_dimension_numbers().lhs_batch_dimensions()), + AsInt64Slice( + dot->dot_dimension_numbers().lhs_contracting_dimensions()))); + TF_ASSIGN_OR_RETURN( + HloInstruction * new_rhs, + NormalizeDotOperandToBatchMajorAndContractingMinor( + rhs, + AsInt64Slice(dot->dot_dimension_numbers().rhs_batch_dimensions()), + AsInt64Slice( + dot->dot_dimension_numbers().rhs_contracting_dimensions()))); + + int64 lhs_outer_dims = + lhs->shape().rank() - + (dot->dot_dimension_numbers().lhs_batch_dimensions_size() + + dot->dot_dimension_numbers().lhs_contracting_dimensions_size()); + int64 rhs_outer_dims = + rhs->shape().rank() - + (dot->dot_dimension_numbers().rhs_batch_dimensions_size() + + dot->dot_dimension_numbers().rhs_contracting_dimensions_size()); + CHECK(lhs_outer_dims == 0 || rhs_outer_dims == 0); + if (rhs_outer_dims > 0) { + std::vector lhs_broadcast_dims( + dot->dot_dimension_numbers().lhs_batch_dimensions_size()); + absl::c_iota(lhs_broadcast_dims, 0); + lhs_broadcast_dims.resize(lhs->shape().rank()); + std::iota(lhs_broadcast_dims.begin() + + dot->dot_dimension_numbers().lhs_batch_dimensions_size(), + lhs_broadcast_dims.end(), + dot->dot_dimension_numbers().lhs_batch_dimensions_size() + + rhs_outer_dims); + new_lhs = computation_->AddInstruction(HloInstruction::CreateBroadcast( + new_rhs->shape(), new_lhs, lhs_broadcast_dims)); + } else if (lhs_outer_dims > 0) { + std::vector rhs_broadcast_dims( + dot->dot_dimension_numbers().rhs_batch_dimensions_size()); + absl::c_iota(rhs_broadcast_dims, 0); + rhs_broadcast_dims.resize(rhs->shape().rank()); + std::iota(rhs_broadcast_dims.begin() + + dot->dot_dimension_numbers().rhs_batch_dimensions_size(), + rhs_broadcast_dims.end(), + dot->dot_dimension_numbers().rhs_batch_dimensions_size() + + lhs_outer_dims); + new_rhs = computation_->AddInstruction(HloInstruction::CreateBroadcast( + new_lhs->shape(), new_rhs, rhs_broadcast_dims)); + } + + TF_ASSIGN_OR_RETURN(HloInstruction * new_dot, + MakeBinaryHlo(HloOpcode::kMultiply, new_lhs, new_rhs)); + std::vector reduce_dims( + dot->dot_dimension_numbers().lhs_contracting_dimensions_size()); + new_dot = AsType(new_dot, F32); + const int64 outer_dims = std::max(rhs_outer_dims, lhs_outer_dims); + absl::c_iota( + reduce_dims, + outer_dims + dot->dot_dimension_numbers().lhs_batch_dimensions_size()); + new_dot = AddReduce(new_dot, reduce_dims); + new_dot = AsType(new_dot, dot->shape().element_type()); + return ReplaceInstruction(dot, new_dot); + } + if (lhs->shape().rank() > 2 || rhs->shape().rank() > 2 || dot->shape().rank() > 2) { if (options_.enable_dot_strength_reduction() && diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index feb6a0fb79..7743979e3f 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -3712,8 +3712,8 @@ TEST_F(AlgebraicSimplifierTest, IteratorInvalidation) { HloInstruction* y = builder.AddInstruction(HloInstruction::CreateParameter(1, r1f32, "y")); DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); + dot_dnums.add_lhs_batch_dimensions(0); + dot_dnums.add_rhs_batch_dimensions(0); builder.AddInstruction(HloInstruction::CreateDot(r1f32, x, y, dot_dnums, DefaultPrecisionConfig(2))); std::unique_ptr dot_computation(builder.Build()); @@ -4220,12 +4220,24 @@ TEST_P(BatchDotStrengthReductionTest, BatchDotStrengthReduction) { int m, k, n; PrimitiveType element_type; std::tie(m, k, n, element_type) = GetParam(); - - Shape dot_shape = ShapeUtil::MakeShape(element_type, {1, 3, 5, m, n}); - Shape lhs_shape = k > 0 ? ShapeUtil::MakeShape(element_type, {1, 3, 5, m, k}) - : ShapeUtil::MakeShape(element_type, {1, 3, 5, m}); - Shape rhs_shape = k > 0 ? ShapeUtil::MakeShape(element_type, {1, 3, 5, k, n}) - : ShapeUtil::MakeShape(element_type, {1, 3, 5, n}); + std::vector lhs_dims = {1, 3, 5}; + std::vector rhs_dims = lhs_dims; + std::vector output_dims = lhs_dims; + if (m > 0) { + lhs_dims.push_back(m); + output_dims.push_back(m); + } + if (k > 0) { + lhs_dims.push_back(k); + rhs_dims.push_back(k); + } + if (n > 0) { + rhs_dims.push_back(n); + output_dims.push_back(n); + } + Shape dot_shape = ShapeUtil::MakeShape(element_type, output_dims); + Shape lhs_shape = ShapeUtil::MakeShape(element_type, lhs_dims); + Shape rhs_shape = ShapeUtil::MakeShape(element_type, rhs_dims); HloComputation::Builder builder(TestName()); auto lhs = builder.AddInstruction( @@ -4240,7 +4252,7 @@ TEST_P(BatchDotStrengthReductionTest, BatchDotStrengthReduction) { dot_dnums.add_rhs_batch_dimensions(1); dot_dnums.add_rhs_batch_dimensions(2); if (k > 0) { - dot_dnums.add_lhs_contracting_dimensions(4); + dot_dnums.add_lhs_contracting_dimensions(m > 0 ? 4 : 3); dot_dnums.add_rhs_contracting_dimensions(3); } builder.AddInstruction(HloInstruction::CreateDot( @@ -4248,9 +4260,9 @@ TEST_P(BatchDotStrengthReductionTest, BatchDotStrengthReduction) { auto computation = module->AddEntryComputation(builder.Build()); AlgebraicSimplifier simplifier(default_options_); TF_ASSERT_OK_AND_ASSIGN(bool changed, simplifier.Run(module.get())); - const bool dot_should_be_transformed = m == 1 || k == 1 || n == 1 || k == -1; - const bool computation_should_be_modified = dot_should_be_transformed; - EXPECT_EQ(changed, computation_should_be_modified); + const bool dot_should_be_transformed = + m == 1 || k == 1 || n == 1 || m == -1 || k == -1 || n == -1; + EXPECT_EQ(changed, dot_should_be_transformed); bool has_no_dot = true; for (const auto& hlo : computation->instructions()) { if (hlo->opcode() == HloOpcode::kDot) { @@ -4261,10 +4273,12 @@ TEST_P(BatchDotStrengthReductionTest, BatchDotStrengthReduction) { EXPECT_EQ(has_no_dot, dot_should_be_transformed); } -INSTANTIATE_TEST_SUITE_P( - BatchDotStrengthReductionTestInstantiation, BatchDotStrengthReductionTest, - ::testing::Combine(::testing::Values(1, 2), ::testing::Values(-1, 1, 2), - ::testing::Values(1, 2), ::testing::Values(F32, BF16))); +INSTANTIATE_TEST_SUITE_P(BatchDotStrengthReductionTestInstantiation, + BatchDotStrengthReductionTest, + ::testing::Combine(::testing::Values(-1, 1, 2), + ::testing::Values(-1, 1, 2), + ::testing::Values(-1, 1, 2), + ::testing::Values(F32, BF16))); class DotStrengthReductionTest : public AlgebraicSimplifierTest, diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 7a165e69f3..b97675b1b4 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1188,6 +1188,8 @@ std::vector GetEinsumTestCases() { p{v{8, 55, 11, 3}, v{55, 11, 3, 29}, "mkBC,kBCn->BCnm"}, p{v{5, 6}, v{6, 7}, "ab,cd->dcba"}, p{v{6}, v{6, 7}, "b,bc->c"}, + p{v{5, 6, 7}, v{5, 6, 7}, "abc,abc->ab"}, + p{v{5, 6, 7}, v{7, 6, 5}, "abc,cba->ca"}, p{v{77}, v{77}, "a,a->a"}, p{v{77}, v{77, 55}, "a,ab->ba"}, p{v{2, 3, 77}, v{77, 2, 3, 55}, "ija,aijb->baij"}, -- GitLab From 6928a18f1b41ca6c0d1b34d15f5482e4668ad12a Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Wed, 13 Feb 2019 15:58:29 -0800 Subject: [PATCH 084/351] Int8 support for sub. PiperOrigin-RevId: 233847540 --- tensorflow/lite/kernels/register.cc | 4 +- tensorflow/lite/kernels/sub.cc | 72 ++++++++---- tensorflow/lite/kernels/sub_test.cc | 114 ++++++++++++------- tensorflow/lite/toco/tflite/operator.cc | 6 + tensorflow/lite/toco/tflite/operator_test.cc | 2 + 5 files changed, 139 insertions(+), 59 deletions(-) diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index d1689788f1..0a0ed49f88 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -256,7 +256,9 @@ BuiltinOpResolver::BuiltinOpResolver() { /* max_version */ 2); AddBuiltin(BuiltinOperator_MEAN, Register_MEAN()); AddBuiltin(BuiltinOperator_DIV, Register_DIV()); - AddBuiltin(BuiltinOperator_SUB, Register_SUB()); + AddBuiltin(BuiltinOperator_SUB, Register_SUB(), + /* min_version */ 1, + /* max_version */ 2); AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT(), /* min_version */ 1, /* max_version */ 3); AddBuiltin(BuiltinOperator_SPLIT_V, Register_SPLIT_V()); diff --git a/tensorflow/lite/kernels/sub.cc b/tensorflow/lite/kernels/sub.cc index 9144144e16..8bd6052307 100644 --- a/tensorflow/lite/kernels/sub.cc +++ b/tensorflow/lite/kernels/sub.cc @@ -12,10 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include "tensorflow/lite/c/builtin_op_data.h" #include "tensorflow/lite/c/c_api_internal.h" #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/lite/kernels/internal/quantization_util.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/lite/kernels/internal/tensor.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -68,21 +70,39 @@ void Free(TfLiteContext* context, void* buffer) { delete reinterpret_cast(buffer); } -TfLiteStatus PrepareUint8SubOp(TfLiteContext* context, - const TfLiteTensor* input_1, - const TfLiteTensor* input_2, - TfLiteTensor* output, TfLiteSubParams* params, - OpData* op_params, int op_sign) { +TfLiteStatus Prepare8BitSubOp(TfLiteContext* context, + const TfLiteTensor* input_1, + const TfLiteTensor* input_2, TfLiteTensor* output, + TfLiteSubParams* params, OpData* op_params, + int op_sign) { + TF_LITE_ENSURE(context, + output->type == kTfLiteUInt8 || output->type == kTfLiteInt8); const auto& input1_quantization_params = input_1->params; const auto& input2_quantization_params = input_2->params; const auto& output_quantization_params = output->params; + int32_t integer_type_min = 0; + int32_t integer_type_max = 0; + if (output->type == kTfLiteUInt8) { + integer_type_min = std::numeric_limits::min(); + integer_type_max = std::numeric_limits::max(); + } else { + // output->type == kTfLiteInt8 + integer_type_min = std::numeric_limits::min(); + integer_type_max = std::numeric_limits::max(); + } - TF_LITE_ENSURE(context, input1_quantization_params.zero_point >= 0); - TF_LITE_ENSURE(context, input1_quantization_params.zero_point <= 255); - TF_LITE_ENSURE(context, input2_quantization_params.zero_point >= 0); - TF_LITE_ENSURE(context, input2_quantization_params.zero_point <= 255); - TF_LITE_ENSURE(context, output_quantization_params.zero_point >= 0); - TF_LITE_ENSURE(context, output_quantization_params.zero_point <= 255); + TF_LITE_ENSURE(context, + input1_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + input1_quantization_params.zero_point <= integer_type_max); + TF_LITE_ENSURE(context, + input2_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + input2_quantization_params.zero_point <= integer_type_max); + TF_LITE_ENSURE(context, + output_quantization_params.zero_point >= integer_type_min); + TF_LITE_ENSURE(context, + output_quantization_params.zero_point <= integer_type_max); op_params->input1_offset = -input1_quantization_params.zero_point; op_params->input2_offset = -input2_quantization_params.zero_point; @@ -109,10 +129,15 @@ TfLiteStatus PrepareUint8SubOp(TfLiteContext* context, tflite::QuantizeMultiplierSmallerThanOneExp(real_output_multiplier, &op_params->output_multiplier, &op_params->output_shift); - - CalculateActivationRangeUint8(params->activation, output, - &op_params->output_activation_min, - &op_params->output_activation_max); + if (output->type == kTfLiteUInt8) { + CalculateActivationRangeUint8(params->activation, output, + &op_params->output_activation_min, + &op_params->output_activation_max); + } else { + CalculateActivationRangeInt8(params->activation, output, + &op_params->output_activation_min, + &op_params->output_activation_max); + } return kTfLiteOk; } @@ -186,9 +211,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { output_size = TfLiteIntArrayCopy(input1->dims); } - if (output->type == kTfLiteUInt8) { - TF_LITE_ENSURE_OK(context, PrepareUint8SubOp(context, input1, input2, - output, params, data, -1)); + if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) { + TF_LITE_ENSURE_OK(context, Prepare8BitSubOp(context, input1, input2, output, + params, data, -1)); } else if (output->type == kTfLiteInt16) { TF_LITE_ENSURE_OK(context, PrepareInt16SubOp(context, input1, input2, output, params, data)); @@ -271,9 +296,15 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, GetTensorData(input1), GetTensorShape(input2), \ GetTensorData(input2), GetTensorShape(output), \ GetTensorData(output)) - if (output->type == kTfLiteUInt8) { // NOTE: We are using the add kernels. This is possible as the second values // multiplier is negated before being passed down. + if (output->type == kTfLiteInt8) { + if (need_broadcast) { + TF_LITE_SUB(reference_integer_ops, BroadcastAdd4DSlow, int8_t); + } else { + TF_LITE_SUB(reference_integer_ops, Add, int8_t); + } + } else if (output->type == kTfLiteUInt8) { if (kernel_type == kReference) { if (need_broadcast) { TF_LITE_SUB(reference_ops, BroadcastAdd4DSlow, uint8_t); @@ -319,7 +350,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) { EvalSub(context, node, params, data, input1, input2, output); - } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) { + } else if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8 || + output->type == kTfLiteInt16) { EvalQuantized(context, node, params, data, input1, input2, output); } else { diff --git a/tensorflow/lite/kernels/sub_test.cc b/tensorflow/lite/kernels/sub_test.cc index 23927c6554..3c19678b20 100644 --- a/tensorflow/lite/kernels/sub_test.cc +++ b/tensorflow/lite/kernels/sub_test.cc @@ -63,9 +63,10 @@ class QuantizedSubOpModel : public BaseSubOpModel { public: using BaseSubOpModel::BaseSubOpModel; + template std::vector GetDequantizedOutput() { - return Dequantize(ExtractVector(output_), - GetScale(output_), GetZeroPoint(output_)); + return Dequantize(ExtractVector(output_), + GetScale(output_), GetZeroPoint(output_)); } std::vector GetDequantizedOutputInt16() { @@ -74,17 +75,15 @@ class QuantizedSubOpModel : public BaseSubOpModel { } }; -// for quantized Sub, the error shouldn't exceed 2*step +// for quantized Sub, the error shouldn't exceed step float GetTolerance(int min, int max) { float kQuantizedStep = (max - min) / 255.0; - float kQuantizedTolerance = 2.0 * kQuantizedStep; - return kQuantizedTolerance; + return kQuantizedStep; } float GetToleranceInt16(float min, float max) { float kQuantizedStep = (max - min) / std::numeric_limits::max(); - float kQuantizedTolerance = 2.0 * kQuantizedStep; - return kQuantizedTolerance; + return kQuantizedStep; } TEST(FloatSubOpModel, NoActivation) { @@ -194,7 +193,8 @@ TEST(IntegerSubOpModel, WithBroadcast) { } } -TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { +template +void QuantizedTestsNoActivation() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector> inputs1 = { {0.1, 0.2, 0.3, 0.4}, {-0.2, 0.2, 0.4, 0.7}, {-0.01, 0.2, 0.7, 0.3}}; @@ -204,20 +204,30 @@ TEST(QuantizedSubOpModel, QuantizedTestsNoActivation) { {-0.8, -0.2, -0.1, 0.9}, {-0.61, -0.2, 0.88, -0.2}}; for (int i = 0; i < inputs1.size(); ++i) { - QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}, + QuantizedSubOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {}, -1.0, 1.0}, ActivationFunctionType_NONE); - m.QuantizeAndPopulate(m.input1(), inputs1[i]); - m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( - results[i], kQuantizedTolerance))) + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance))) << "With test number " << i; } } -TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) { +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationUInt8) { + QuantizedTestsNoActivation(); +} + +TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt8) { + QuantizedTestsNoActivation(); +} + +template +void QuantizedTestsActivationRELU_N1_TO_1() { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, {-0.8, 0.2, 0.7, 0.5}}; @@ -226,57 +236,85 @@ TEST(QuantizedSubOpModel, QuantizedTestsActivationRELU_N1_TO_1) { std::vector> results = {{-1.0, -0.2, 0.0, 1.0}, {-1.0, -0.2, 1.0, 0.2}}; for (int i = 0; i < inputs1.size(); ++i) { - QuantizedSubOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}, + QuantizedSubOpModel m({tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {1, 2, 2, 1}, -1.0, 1.0}, + {tensor_type, {}, -1.0, 1.0}, ActivationFunctionType_RELU_N1_TO_1); - m.QuantizeAndPopulate(m.input1(), inputs1[i]); - m.QuantizeAndPopulate(m.input2(), inputs2[i]); + m.QuantizeAndPopulate(m.input1(), inputs1[i]); + m.QuantizeAndPopulate(m.input2(), inputs2[i]); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( - results[i], kQuantizedTolerance))) + EXPECT_THAT( + m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear(results[i], kQuantizedTolerance))) << "With test number " << i; } } +TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1UInt8) { + QuantizedTestsActivationRELU_N1_TO_1(); +} -TEST(QuantizedSubOpModel, QuantizedVariousInputShapes) { +TEST(QuantizedSubOpModel, QuantizedTestsActivationRELUN1TO1Int8) { + QuantizedTestsActivationRELU_N1_TO_1(); +} + +template +void QuantizedVariousInputShapes() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { - QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, - {TensorType_UINT8, test_shapes[i], -3.0, 3.0}, - {TensorType_UINT8, {}, -3.0, 3.0}, + QuantizedSubOpModel m({tensor_type, test_shapes[i], -3.0, 3.0}, + {tensor_type, test_shapes[i], -3.0, 3.0}, + {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE); - m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); - m.QuantizeAndPopulate(m.input2(), {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); + m.QuantizeAndPopulate(m.input1(), + {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), + {0.1, 0.3, 0.3, 0.5, 1.1, 0.1}); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( {-2.1, -0.1, 0.4, 0.3, 0.0, 1.9}, kQuantizedTolerance))) << "With shape number " << i; } } -TEST(QuantizedSubOpModel, QuantizedWithBroadcast) { +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesUInt8) { + QuantizedVariousInputShapes(); +} + +TEST(QuantizedSubOpModel, QuantizedVariousInputShapesInt8) { + QuantizedVariousInputShapes(); +} + +template +void QuantizedWithBroadcast() { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { - QuantizedSubOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, - {TensorType_UINT8, {}, -3.0, 3.0}, - {TensorType_UINT8, {}, -3.0, 3.0}, - ActivationFunctionType_NONE); - m.QuantizeAndPopulate(m.input1(), {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); - m.QuantizeAndPopulate(m.input2(), {0.7}); + QuantizedSubOpModel m( + {tensor_type, test_shapes[i], -3.0, 3.0}, {tensor_type, {}, -3.0, 3.0}, + {tensor_type, {}, -3.0, 3.0}, ActivationFunctionType_NONE); + m.QuantizeAndPopulate(m.input1(), + {-2.0, 0.2, 0.7, 0.8, 1.1, 2.0}); + m.QuantizeAndPopulate(m.input2(), {0.7}); m.Invoke(); - EXPECT_THAT(m.GetDequantizedOutput(), + EXPECT_THAT(m.GetDequantizedOutput(), ElementsAreArray(ArrayFloatNear( {-2.7, -0.5, 0.0, 0.1, 0.4, 1.3}, kQuantizedTolerance))) << "With shape number " << i; } } +TEST(QuantizedSubOpModel, QuantizedWithBroadcastUInt8) { + QuantizedWithBroadcast(); +} + +TEST(QuantizedSubOpModel, QuantizedWithBroadcastInt8) { + QuantizedWithBroadcast(); +} + TEST(QuantizedSubOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 0a8ac0cca6..8192123cfc 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -276,6 +276,12 @@ class Sub : public BuiltinOperatorinputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + // If the op take int8 input, it is version 2. + if (input_array.data_type == ArrayDataType::kInt8) { + return 2; + } return 1; } }; diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index 0c3eab3b3b..c8124f11c9 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -834,6 +834,8 @@ TEST_F(OperatorTest, VersioningSliceTest) { TEST_F(OperatorTest, VersioningAddTest) { SimpleVersioningTest(); } +TEST_F(OperatorTest, VersioningSubTest) { SimpleVersioningTest(); } + TEST_F(OperatorTest, VersioningSelectTest) { SelectOperator select_op; select_op.inputs = {"input1"}; -- GitLab From 889cb0ff8f2dcb63c7f5a28e1e8053ab406cfd15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 16:05:34 -0800 Subject: [PATCH 085/351] Fixing a large test PiperOrigin-RevId: 233849152 --- .../boosted_trees/estimator_batch/BUILD | 1 - .../estimator_batch/estimator_test.py | 169 ++---------------- 2 files changed, 11 insertions(+), 159 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD index 6a062489ee..64e4c4560b 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/BUILD +++ b/tensorflow/contrib/boosted_trees/estimator_batch/BUILD @@ -200,7 +200,6 @@ py_test( tags = [ "no_gpu", "no_pip_gpu", - "notap", # b/124385673 "notsan", ], deps = [ diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py index 47d910d42a..5a8b2ba9ca 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py @@ -399,8 +399,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): def testQuantileRegression(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 3 - learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE + learner_config.constraints.max_tree_depth = 6 + learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE @@ -413,7 +413,7 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, - num_trees=100, + num_trees=12, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) @@ -428,31 +428,12 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): self.assertTrue(frac_below_upper >= 0.92) self.assertTrue(frac_below_upper <= 0.98) - train_input_fn, test_input_fn, _ = _quantile_regression_input_fns() - model_lower = estimator.GradientBoostedDecisionTreeQuantileRegressor( - quantiles=[0.05], - learner_config=learner_config, - num_trees=100, - examples_per_layer=_QUANTILE_REGRESSION_SIZE, - center_bias=False) - - model_lower.fit(input_fn=train_input_fn, steps=1000) - result_iter = model_lower.predict(input_fn=test_input_fn) - lower = [] - for prediction_dict in result_iter: - lower.append(prediction_dict["scores"]) - - frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3) - # +/- 3% - self.assertTrue(frac_above_lower >= 0.92) - self.assertTrue(frac_above_lower <= 0.98) - # Multi-dimensional quantile regression. def testQuantileRegressionMultiDimLabel(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 3 - learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE + learner_config.constraints.max_tree_depth = 6 + learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE @@ -467,7 +448,7 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): quantiles=[0.95], learner_config=learner_config, label_dimension=2, - num_trees=100, + num_trees=18, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) @@ -490,35 +471,6 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): self.assertTrue(frac_both_below_upper >= 0.91) self.assertTrue(frac_both_below_upper <= 0.99) - train_input_fn, test_input_fn, _ = _quantile_regression_input_fns( - two_dimension=True) - model_lower = estimator.GradientBoostedDecisionTreeQuantileRegressor( - quantiles=[0.05], - learner_config=learner_config, - label_dimension=2, - num_trees=100, - examples_per_layer=_QUANTILE_REGRESSION_SIZE, - center_bias=False) - - model_lower.fit(input_fn=train_input_fn, steps=1000) - result_iter = model_lower.predict(input_fn=test_input_fn) - lower = [] - for prediction_dict in result_iter: - lower.append(prediction_dict["scores"]) - - count_above_lower = np.count_nonzero(lower < y, axis=0) - count_both_aboce_lower = np.count_nonzero(np.prod(lower < y, axis=1)) - frac_above_lower_0 = round(1. * count_above_lower[0] / len(y), 3) - frac_above_lower_1 = round(1. * count_above_lower[1] / len(y), 3) - frac_both_above_lower = round(1. * count_both_aboce_lower / len(y), 3) - # +/- 3% - self.assertTrue(frac_above_lower_0 >= 0.92) - self.assertTrue(frac_above_lower_0 <= 0.98) - self.assertTrue(frac_above_lower_1 >= 0.92) - self.assertTrue(frac_above_lower_1 <= 0.98) - self.assertTrue(frac_both_above_lower >= 0.91) - self.assertTrue(frac_both_above_lower <= 0.99) - class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase): @@ -712,11 +664,12 @@ class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase): est.evaluate(input_fn=input_fn, steps=1) est.predict(input_fn=input_fn) - # One dimensional quantile regression. - def testQuantileRegression(self): + # Quantile regression in core is the same as in non core estimator, so we + # just check that it does not fail. + def testQuantileRegressionDoesNotThroughException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 3 + learner_config.constraints.max_tree_depth = 1 learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE @@ -731,112 +684,12 @@ class CoreGradientBoostedDecisionTreeEstimators(test_util.TensorFlowTestCase): model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, - num_trees=100, - examples_per_layer=_QUANTILE_REGRESSION_SIZE, - center_bias=False) - - model_upper.train(input_fn=train_input_fn, steps=1000) - result_iter = model_upper.predict(input_fn=test_input_fn) - upper = [] - for prediction_dict in result_iter: - upper.append(prediction_dict["predictions"]) - - frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3) - # +/- 3% - self.assertTrue(frac_below_upper >= 0.92) - self.assertTrue(frac_below_upper <= 0.98) - - train_input_fn, test_input_fn, _ = _quantile_regression_input_fns() - model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( - quantiles=[0.05], - learner_config=learner_config, - num_trees=100, - examples_per_layer=_QUANTILE_REGRESSION_SIZE, - center_bias=False) - - model_lower.train(input_fn=train_input_fn, steps=1000) - result_iter = model_lower.predict(input_fn=test_input_fn) - lower = [] - for prediction_dict in result_iter: - lower.append(prediction_dict["predictions"]) - - frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3) - # +/- 3% - self.assertTrue(frac_above_lower >= 0.92) - self.assertTrue(frac_above_lower <= 0.98) - - # Multi-dimensional quantile regression. - def testQuantileRegressionMultiDimLabel(self): - learner_config = learner_pb2.LearnerConfig() - learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 3 - learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE - learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE - learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE - learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE - learner_config.regularization.tree_complexity = ( - 1.0 / _QUANTILE_REGRESSION_SIZE) - - train_input_fn, test_input_fn, y = _quantile_regression_input_fns( - two_dimension=True) - y = y.reshape(_QUANTILE_REGRESSION_SIZE, 2) - - # 95% percentile. - model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( - quantiles=[0.95], - learner_config=learner_config, - num_trees=100, - label_dimension=2, + num_trees=1, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.train(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn) - upper = [] - for prediction_dict in result_iter: - upper.append(prediction_dict["predictions"]) - - count_below_upper = np.count_nonzero(upper > y, axis=0) - count_both_below_upper = np.count_nonzero(np.prod(upper > y, axis=1)) - frac_below_upper_0 = round(1. * count_below_upper[0] / len(y), 3) - frac_below_upper_1 = round(1. * count_below_upper[1] / len(y), 3) - frac_both_below_upper = round(1. * count_both_below_upper / len(y), 3) - # +/- 3% - self.assertTrue(frac_below_upper_0 >= 0.92) - self.assertTrue(frac_below_upper_0 <= 0.98) - self.assertTrue(frac_below_upper_1 >= 0.92) - self.assertTrue(frac_below_upper_1 <= 0.98) - self.assertTrue(frac_both_below_upper >= 0.91) - self.assertTrue(frac_both_below_upper <= 0.99) - - train_input_fn, test_input_fn, _ = _quantile_regression_input_fns( - two_dimension=True) - model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( - quantiles=[0.05], - learner_config=learner_config, - num_trees=100, - label_dimension=2, - examples_per_layer=_QUANTILE_REGRESSION_SIZE, - center_bias=False) - - model_lower.train(input_fn=train_input_fn, steps=1000) - result_iter = model_lower.predict(input_fn=test_input_fn) - lower = [] - for prediction_dict in result_iter: - lower.append(prediction_dict["predictions"]) - - count_above_lower = np.count_nonzero(lower < y, axis=0) - count_both_aboce_lower = np.count_nonzero(np.prod(lower < y, axis=1)) - frac_above_lower_0 = round(1. * count_above_lower[0] / len(y), 3) - frac_above_lower_1 = round(1. * count_above_lower[1] / len(y), 3) - frac_both_above_lower = round(1. * count_both_aboce_lower / len(y), 3) - # +/- 3% - self.assertTrue(frac_above_lower_0 >= 0.92) - self.assertTrue(frac_above_lower_0 <= 0.98) - self.assertTrue(frac_above_lower_1 >= 0.92) - self.assertTrue(frac_above_lower_1 <= 0.98) - self.assertTrue(frac_both_above_lower >= 0.91) - self.assertTrue(frac_both_above_lower <= 0.99) if __name__ == "__main__": -- GitLab From fa6fad43c7b12781ad42ddc9a6dfa9d741f9532f Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Wed, 13 Feb 2019 16:07:41 -0800 Subject: [PATCH 086/351] Create C++ API for memory stats ops. PiperOrigin-RevId: 233849594 --- tensorflow/contrib/memory_stats/BUILD | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tensorflow/contrib/memory_stats/BUILD b/tensorflow/contrib/memory_stats/BUILD index 63843b993c..93701249cc 100644 --- a/tensorflow/contrib/memory_stats/BUILD +++ b/tensorflow/contrib/memory_stats/BUILD @@ -10,6 +10,7 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_cc") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") @@ -45,6 +46,28 @@ tf_gen_op_wrapper_py( deps = [":memory_stats_ops_op_lib"], ) +tf_gen_op_wrapper_cc( + name = "memory_stats_ops", + out_ops_file = "memory_stats_ops", +) + +cc_library( + name = "memory_stats_cc", + srcs = ["memory_stats_ops.cc"], + hdrs = ["memory_stats_ops.h"], + visibility = ["//visibility:public"], + deps = [ + ":memory_stats_kernels", + ":memory_stats_ops_op_lib", + "//tensorflow/cc:const_op", + "//tensorflow/cc:ops", + "//tensorflow/cc:scope", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + tf_custom_op_py_library( name = "memory_stats_py", srcs = [ -- GitLab From 5daa15b88603ec55aefed9cafe3caca135d5710e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 16:19:09 -0800 Subject: [PATCH 087/351] Making improvements to error messages: 1. Given tensorflow_estimator is now a new repo, we ignore those files too to determine user's files. 2. Instead of giving root level inputs, we now give users information about the immediate inputs. PiperOrigin-RevId: 233851856 --- .../python/framework/error_interpolation.py | 36 +++++++------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/framework/error_interpolation.py b/tensorflow/python/framework/error_interpolation.py index 7477ade8a9..b671dfbfaa 100644 --- a/tensorflow/python/framework/error_interpolation.py +++ b/tensorflow/python/framework/error_interpolation.py @@ -41,6 +41,8 @@ _ParseTag = collections.namedtuple("_ParseTag", ["type", "name"]) _BAD_FILE_SUBSTRINGS = [ os.path.join("tensorflow", "python"), os.path.join("tensorflow", "contrib"), + os.path.join("tensorflow_estimator", "python"), + os.path.join("tensorflow_estimator", "contrib"), " Date: Wed, 13 Feb 2019 16:19:44 -0800 Subject: [PATCH 088/351] Add new op schema for tf.where. PiperOrigin-RevId: 233851960 --- tensorflow/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + tensorflow/lite/nnapi_delegate.cc | 1 + tensorflow/lite/schema/schema.fbs | 5 + tensorflow/lite/schema/schema_generated.h | 124 +++++++++++++++++- 5 files changed, 126 insertions(+), 6 deletions(-) diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h index 331694c2f6..3a42a60cb8 100644 --- a/tensorflow/lite/builtin_ops.h +++ b/tensorflow/lite/builtin_ops.h @@ -134,6 +134,7 @@ typedef enum { kTfLiteBuiltinAddN = 106, kTfLiteBuiltinGatherNd = 107, kTfLiteBuiltinCos = 108, + kTfLiteBuiltinWhere = 109, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 32b8cfecc4..0224836135 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -730,6 +730,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_REVERSE_V2: case BuiltinOperator_ADD_N: case BuiltinOperator_GATHER_ND: + case BuiltinOperator_WHERE: break; } return kTfLiteOk; diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc index a5815d3a40..065ae52b5e 100644 --- a/tensorflow/lite/nnapi_delegate.cc +++ b/tensorflow/lite/nnapi_delegate.cc @@ -667,6 +667,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_REVERSE_V2: case tflite::BuiltinOperator_ADD_N: case tflite::BuiltinOperator_GATHER_ND: + case tflite::BuiltinOperator_WHERE: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 19c144d709..cf4d40aec6 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -222,6 +222,7 @@ enum BuiltinOperator : byte { ADD_N = 106, GATHER_ND = 107, COS = 108, + WHERE = 109, } // Options for the builtin operators. @@ -310,6 +311,7 @@ union BuiltinOptions { AddNOptions, GatherNdOptions, CosOptions, + WhereOptions, } enum Padding : byte { SAME, VALID } @@ -739,6 +741,9 @@ table AddNOptions { table GatherNdOptions { } +table WhereOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 8734f4d706..4ddfc11e4c 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -283,6 +283,9 @@ struct AddNOptionsT; struct GatherNdOptions; struct GatherNdOptionsT; +struct WhereOptions; +struct WhereOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -541,11 +544,12 @@ enum BuiltinOperator { BuiltinOperator_ADD_N = 106, BuiltinOperator_GATHER_ND = 107, BuiltinOperator_COS = 108, + BuiltinOperator_WHERE = 109, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_COS + BuiltinOperator_MAX = BuiltinOperator_WHERE }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[108] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[109] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -654,7 +658,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[108] { BuiltinOperator_REVERSE_V2, BuiltinOperator_ADD_N, BuiltinOperator_GATHER_ND, - BuiltinOperator_COS + BuiltinOperator_COS, + BuiltinOperator_WHERE }; return values; } @@ -770,6 +775,7 @@ inline const char * const *EnumNamesBuiltinOperator() { "ADD_N", "GATHER_ND", "COS", + "WHERE", nullptr }; return names; @@ -866,11 +872,12 @@ enum BuiltinOptions { BuiltinOptions_AddNOptions = 82, BuiltinOptions_GatherNdOptions = 83, BuiltinOptions_CosOptions = 84, + BuiltinOptions_WhereOptions = 85, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_CosOptions + BuiltinOptions_MAX = BuiltinOptions_WhereOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[85] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[86] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -956,7 +963,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[85] { BuiltinOptions_ReverseV2Options, BuiltinOptions_AddNOptions, BuiltinOptions_GatherNdOptions, - BuiltinOptions_CosOptions + BuiltinOptions_CosOptions, + BuiltinOptions_WhereOptions }; return values; } @@ -1048,6 +1056,7 @@ inline const char * const *EnumNamesBuiltinOptions() { "AddNOptions", "GatherNdOptions", "CosOptions", + "WhereOptions", nullptr }; return names; @@ -1398,6 +1407,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_CosOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -2101,6 +2114,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_CosOptions ? reinterpret_cast(value) : nullptr; } + WhereOptionsT *AsWhereOptions() { + return type == BuiltinOptions_WhereOptions ? + reinterpret_cast(value) : nullptr; + } + const WhereOptionsT *AsWhereOptions() const { + return type == BuiltinOptions_WhereOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -7357,6 +7378,46 @@ inline flatbuffers::Offset CreateGatherNdOptions( flatbuffers::Offset CreateGatherNdOptions(flatbuffers::FlatBufferBuilder &_fbb, const GatherNdOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct WhereOptionsT : public flatbuffers::NativeTable { + typedef WhereOptions TableType; + WhereOptionsT() { + } +}; + +struct WhereOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef WhereOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + WhereOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct WhereOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit WhereOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + WhereOptionsBuilder &operator=(const WhereOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateWhereOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + WhereOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -7742,6 +7803,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const CosOptions *builtin_options_as_CosOptions() const { return builtin_options_type() == BuiltinOptions_CosOptions ? static_cast(builtin_options()) : nullptr; } + const WhereOptions *builtin_options_as_WhereOptions() const { + return builtin_options_type() == BuiltinOptions_WhereOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -8109,6 +8173,10 @@ template<> inline const CosOptions *Operator::builtin_options_as() c return builtin_options_as_CosOptions(); } +template<> inline const WhereOptions *Operator::builtin_options_as() const { + return builtin_options_as_WhereOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -10848,6 +10916,29 @@ inline flatbuffers::Offset CreateGatherNdOptions(flatbuffers::F _fbb); } +inline WhereOptionsT *WhereOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new WhereOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void WhereOptions::UnPackTo(WhereOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset WhereOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateWhereOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateWhereOptions(flatbuffers::FlatBufferBuilder &_fbb, const WhereOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const WhereOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateWhereOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -11442,6 +11533,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -11796,6 +11891,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -12138,6 +12237,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateCosOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(value); + return CreateWhereOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -12480,6 +12583,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new CosOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_WhereOptions: { + value = new WhereOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -12907,6 +13014,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_WhereOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 2269531caf76a1ac3f8c787e2904ea2ae6bbb9d6 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 13 Feb 2019 16:23:04 -0800 Subject: [PATCH 089/351] Complete the fix in cr/233681022 cr/233681022 canonicalized the types of the lhs, rhs and addend buffers. But we also need to canonicalize the type of the result buffer, which this CL does. PiperOrigin-RevId: 233852641 --- .../xla/service/cpu/tiled_dot_emitter.cc | 34 +++++++++++-------- .../compiler/xla/tests/dot_operation_test.cc | 29 ++++++++++++++-- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc index e54f205465..9fc472ff76 100644 --- a/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/tiled_dot_emitter.cc @@ -948,15 +948,16 @@ llvm::Type* GetPointerToElementType(llvm::Type* pointer_type) { return type->getPointerTo(); } -struct GemvInputsWithCanonicalType { +struct GemvBuffersWithCanonicalType { llvm::Value* lhs_canonicalized; llvm::Value* rhs_canonicalized; llvm::Value* addend_canonicalized; + llvm::Value* result_canonicalized; }; -GemvInputsWithCanonicalType GetGemvInputsWithCanonicalType( +GemvBuffersWithCanonicalType GetGemvBuffersWithCanonicalType( llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend, - llvm::IRBuilder<>* b) { + llvm::Value* result, llvm::IRBuilder<>* b) { // We characterize a GEMV operation via M and K, since N is implicitly 1. // This means the GEMV that multiplies (say) [5,6] with [6,1] is implemented // by the same GEMV that multiplies [5,6] with [1,6]. However, the @@ -965,20 +966,23 @@ GemvInputsWithCanonicalType GetGemvInputsWithCanonicalType( // from the `xla::Shape`s. Since we want to be able to call the same // `llvm::Function` for the two GEMVs we canonicalize the types of the GEMV // inputs here into the same type. - GemvInputsWithCanonicalType result; + GemvBuffersWithCanonicalType buffers_with_canonical_type; llvm::Type* lhs_type = lhs->getType(); llvm::Type* rhs_type = rhs->getType(); llvm::Type* addend_type = addend ? addend->getType() : nullptr; + llvm::Type* result_type = result->getType(); - result.lhs_canonicalized = + buffers_with_canonical_type.lhs_canonicalized = b->CreateBitCast(lhs, GetPointerToElementType(lhs_type)); - result.rhs_canonicalized = + buffers_with_canonical_type.rhs_canonicalized = b->CreateBitCast(rhs, GetPointerToElementType(rhs_type)); - result.addend_canonicalized = + buffers_with_canonical_type.addend_canonicalized = addend ? b->CreateBitCast(addend, GetPointerToElementType(addend_type)) : nullptr; + buffers_with_canonical_type.result_canonicalized = + b->CreateBitCast(result, GetPointerToElementType(result_type)); - return result; + return buffers_with_canonical_type; } } // namespace @@ -993,14 +997,15 @@ void EmitRowMajorGemv(PrimitiveType scalar_type, int64 tile_rows, /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols, /*m=*/m, /*k=*/k, /*has_addend=*/addend != nullptr); - GemvInputsWithCanonicalType canonical_inputs = - GetGemvInputsWithCanonicalType(lhs, rhs, addend, b); + GemvBuffersWithCanonicalType canonical_inputs = + GetGemvBuffersWithCanonicalType(lhs, rhs, addend, result, b); KernelSupportLibrary::EmitAndCallOutlinedKernel( /*enable_fast_math=*/enable_fast_math, /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(), canonical_inputs.lhs_canonicalized, canonical_inputs.rhs_canonicalized, - canonical_inputs.addend_canonicalized, result, + canonical_inputs.addend_canonicalized, + canonical_inputs.result_canonicalized, [&config, b, &canonical_inputs](llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend, llvm::Value* result) { @@ -1020,14 +1025,15 @@ void EmitColumnMajorGemv(PrimitiveType scalar_type, int64 tile_rows, /*tile_rows=*/tile_rows, /*tile_cols=*/tile_cols, /*m=*/m, /*k=*/k, /*has_addend=*/addend != nullptr); - GemvInputsWithCanonicalType canonical_inputs = - GetGemvInputsWithCanonicalType(lhs, rhs, addend, b); + GemvBuffersWithCanonicalType canonical_inputs = + GetGemvBuffersWithCanonicalType(lhs, rhs, addend, result, b); KernelSupportLibrary::EmitAndCallOutlinedKernel( /*enable_fast_math=*/enable_fast_math, /*optimize_for_size=*/optimize_for_size, b, config.GetCacheKey(), canonical_inputs.lhs_canonicalized, canonical_inputs.rhs_canonicalized, - canonical_inputs.addend_canonicalized, result, + canonical_inputs.addend_canonicalized, + canonical_inputs.result_canonicalized, [&config, b, &canonical_inputs](llvm::Value* lhs, llvm::Value* rhs, llvm::Value* addend, llvm::Value* result) { diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index b97675b1b4..262b77264f 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1267,11 +1267,11 @@ ENTRY %test { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } -XLA_TEST_F(DotOperationTextTest, CachingBug) { +XLA_TEST_F(DotOperationTextTest, CpuTiledDotEmitterCachingBug_1) { // Tests for a caching bug in the XLA CPU backend. absl::string_view hlo_string = R"( -HloModule CachingBug +HloModule CpuTiledDotEmitterCachingBug ENTRY main { lhs = f32[20,40] parameter(0) @@ -1288,5 +1288,30 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } +XLA_TEST_F(DotOperationTextTest, CpuTiledDotEmitterCachingBug_2) { + // Tests for a caching bug in the XLA CPU backend. + absl::string_view hlo_string = + R"( +HloModule CpuTiledDotEmitterCachingBug + +ENTRY main { + lhs_0 = f32[20,40] parameter(0) + rhs_0 = f32[40,1] parameter(1) + lhs_1 = f32[1,40] parameter(2) + rhs_1 = f32[20,40] parameter(3) + + dot_0 = f32[20,1] dot(lhs_0, rhs_0), lhs_contracting_dims={1}, rhs_contracting_dims={0} + dot_1 = f32[1,20] dot(lhs_1, rhs_1), lhs_contracting_dims={1}, rhs_contracting_dims={1} + + dot_0_reshaped = f32[20] reshape(dot_0) + dot_1_reshaped = f32[20] reshape(dot_1) + + ROOT result = f32[20] divide(dot_0_reshaped, dot_1_reshaped) +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); +} + } // namespace } // namespace xla -- GitLab From cc83184cad45f5ccf55d501a5217377e8b5de044 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 13 Feb 2019 16:29:03 -0800 Subject: [PATCH 090/351] Remove legacy sampling ops (stateful) from auto control deps. This fixes @tf.function's inability to lift basic Variable initialization when there are other stateful ops above the location the variable is used in the function (e.g. tf.Assert). Also add better error information about which dependencies are preventing auto lifting. PiperOrigin-RevId: 233853786 --- tensorflow/python/BUILD | 1 + tensorflow/python/eager/def_function_test.py | 4 +- tensorflow/python/eager/lift_to_graph.py | 41 ++++++++++++- .../python/framework/auto_control_deps.py | 61 +++++++++++++++++-- .../framework/auto_control_deps_test.py | 26 ++++++++ 5 files changed, 124 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 79f4c68ac4..44e9540fbe 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -971,6 +971,7 @@ tf_py_test( additional_deps = [ ":auto_control_deps", ":client_testlib", + "//tensorflow/python/keras", ], ) diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py index 462aa8aa0a..3d107d5952 100644 --- a/tensorflow/python/eager/def_function_test.py +++ b/tensorflow/python/eager/def_function_test.py @@ -212,7 +212,8 @@ class DefFunctionTest(test.TestCase): state.append(variables.Variable(2.0 * x)) return state[0] * x - with self.assertRaises(lift_to_graph.UnliftableError): + with self.assertRaisesRegexp( + lift_to_graph.UnliftableError, r'transitively.* mul .* x'): fn(constant_op.constant(3.0)) def testMethod(self): @@ -476,6 +477,7 @@ class DefFunctionTest(test.TestCase): with self.assertRaisesRegexp(ValueError, msg): func._decorate(lambda f: f) + if __name__ == '__main__': ops.enable_eager_execution() test.main() diff --git a/tensorflow/python/eager/lift_to_graph.py b/tensorflow/python/eager/lift_to_graph.py index e7c93481d3..2ed2d5882e 100644 --- a/tensorflow/python/eager/lift_to_graph.py +++ b/tensorflow/python/eager/lift_to_graph.py @@ -48,6 +48,41 @@ def _constant_inputs(op_or_tensor): for i in _graph_inputs(_as_operation(op_or_tensor))) +def _path_from(from_op, tensor, sources): + """Find one path from `from_op` to `tensor`, ignoring `sources`. + + Args: + from_op: A `tf.Operation`. + tensor: A `tf.Operation` or `tf.Tensor`. + sources: A list of `tf.Tensor`. + + Returns: + A python string containing the path, or "??" if none is found. + """ + visited_ops = set([x.op for x in sources]) + ops_to_visit = [_as_operation(tensor)] + some_op_output = {} + while ops_to_visit: + op = ops_to_visit.pop() + if op in visited_ops: + continue + visited_ops.add(op) + if op == from_op: + path_op = op + path = [path_op] + final_op = _as_operation(tensor) + while path_op != final_op: + path_op = some_op_output[path_op] + path.append(path_op) + return " <- ".join(["%s (%s)" % (x.name, x.type) for x in reversed(path)]) + else: + for inp in _graph_inputs(op): + if inp not in visited_ops and inp not in sources: + some_op_output[inp] = op + ops_to_visit.append(inp) + return "??" + + def _map_subgraph(init_tensor, sources, disallowed_placeholders, visited_ops, op_outputs, add_sources): """Walk a Graph and capture the subgraph between init_tensor and sources. @@ -91,9 +126,9 @@ def _map_subgraph(init_tensor, sources, disallowed_placeholders, visited_ops, if should_raise: raise UnliftableError( - "Unable to lift tensor", init_tensor, - "because it depends transitively on placeholder ", op) - + "Unable to lift tensor %s because it depends transitively on " + "placeholder %s via at least one path, e.g.: %s" + % (repr(init_tensor), repr(op), _path_from(op, init_tensor, sources))) for inp in _graph_inputs(op): op_outputs[inp].add(op) if inp not in visited_ops and inp not in (sources or extra_sources): diff --git a/tensorflow/python/framework/auto_control_deps.py b/tensorflow/python/framework/auto_control_deps.py index 437c6abbff..f8f2ea5c07 100644 --- a/tensorflow/python/framework/auto_control_deps.py +++ b/tensorflow/python/framework/auto_control_deps.py @@ -39,6 +39,58 @@ ASYNC_STATEFUL_OPS = [ "NcclAllReduce", ] +LEGACY_RANDOM_OPS = [ + # These may be used in variable initializers -- thus their execution should + # not be dependent on other stateful operations. This is because although + # according to program order, tf.Variables may be created in sequence, + # their initialization happens outside of the program order (specifically, + # in graph mode their initialization happens by calling a grouped + # initializer operation or in eager mode, where initialization is lifted + # out of the tf.function and executed the first time the function is + # executed). + # + # Unless there is a specific dependency between the initializers + # themselves (e.g. one initializer depends on a Variable whose value depends + # on another initializer), the initialization can happen in any order so + # long as it's before the associated Variable read operations. + # + # Note that in general the randomness of legacy random operations is only + # guaranteed by providing a graph-level and op-level seed (and ordering of + # the same op across multiple iterations of a while_loop is specifically not + # guaranteed; see the discussion below). + # + # There is a possible race condition inside while_loop where the same + # random OpKernel instantiation is reused across multiple steps + # of the loop. Since legacy Random OpKernels have an internal rng state, + # automatic dependency tracking across loop steps would likely + # fix this race; and for that case this blacklist is problematic. + # However, since automatic dependency tracking inside while loops is not + # currently supported, and there are no other examples of OpKernel reuse + # (each OpKernel is associated with a unique op in graph mode), + # this blacklist has no effect on the aforementioned behavior. + # + # TODO(ebrevdo,skyewm): Modify the check against this blacklist to + # only occur when the op is inside a "variable initialization scope"; and + # add proper autodeps inside while_loops that respects this updated check. + "RandomUniform", + "RandomUniformInt", + "RandomStandardNormal", + "ParameterizedTruncatedNormal", + "TruncatedNormal", + "RandomShuffle", + "Multinomial", + "RandomGamma", + "RandomGammaGrad", + "RandomPoisson", + "RandomPoissonV2", +] + +_ALL_BLACKLISTED_OPS = set(ASYNC_STATEFUL_OPS) | set(LEGACY_RANDOM_OPS) + + +def op_is_stateful(op_def): + return op_def.is_stateful and op_def.name not in _ALL_BLACKLISTED_OPS + class AutomaticControlDependencies(object): """Context manager to automatically add control dependencies. @@ -46,7 +98,7 @@ class AutomaticControlDependencies(object): Code under this context manager will act as if a sensible set of control dependencies were present. More specifically: 1. All stateful ops in the scope will execute (with the exception of ops in - ASYNC_STATEFUL_OPS) + ASYNC_STATEFUL_OPS and LEGACY_RANDOM_OPS) 2. Stateful ops which modify the same resource will execute in program order Note: creating variables in an automatic control dependencies context is not @@ -234,8 +286,7 @@ class AutomaticControlDependencies(object): control_inputs = set() # Ensure stateful ops run if (op.type not in self._graph._registered_ops # pylint: disable=protected-access - or (self._graph._registered_ops[op.type].is_stateful # pylint: disable=protected-access - and op.type not in ASYNC_STATEFUL_OPS)): + or op_is_stateful(self._graph._registered_ops[op.type])): # pylint: disable=protected-access ops_which_must_run.add(op) # Ignore switches (they're handled separately) if op.type == "Switch" and op.inputs[0].dtype == dtypes_module.resource: @@ -272,8 +323,8 @@ class AutomaticControlDependencies(object): if inp in merge_for_resource: merge_for_resource[inp]._add_control_input(op) # pylint: disable=protected-access last_op_using_resource_tensor[inp] = op - if (op.op_def.is_stateful and op.type not in ASYNC_STATEFUL_OPS - and not found_resource and op._control_flow_context is None): # pylint: disable=protected-access + if (op_is_stateful(op.op_def) and not found_resource + and op._control_flow_context is None): # pylint: disable=protected-access if None in last_op_using_resource_tensor: op._add_control_input(last_op_using_resource_tensor[None]) # pylint: disable=protected-access last_op_using_resource_tensor[None] = op diff --git a/tensorflow/python/framework/auto_control_deps_test.py b/tensorflow/python/framework/auto_control_deps_test.py index 2c25ab133b..d9df96f6d7 100644 --- a/tensorflow/python/framework/auto_control_deps_test.py +++ b/tensorflow/python/framework/auto_control_deps_test.py @@ -25,7 +25,9 @@ from tensorflow.python.framework import auto_control_deps as acd from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_spec from tensorflow.python.framework import test_util +from tensorflow.python.keras.layers import core as keras_core from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import resource_variable_ops @@ -296,6 +298,30 @@ class AutomaticControlDependenciesTest(test.TestCase): self.assertEqual(self.evaluate(outer()), 2.0) + def testVariableInitializersCanBeLifted(self): + # The initializer is a stateful op, but using it inside a function should + # *not* create additional dependencies. That's what we're testing. + layer = keras_core.Dense(1, kernel_initializer="glorot_uniform") + + @def_function.function + def fn(x): + # Stateful operation + control_flow_ops.Assert(x, ["Error"]) + # Variable initialization should be lifted. Prior to the change that + # added this test, the lifting would crash because of an auto control dep + # added on `x`. Note, the error did not happen if we + # manually created a tf.Variable outside of function and used it + # here. Alternatively, creating a tf.Variable inside fn() causes + # a different sort of error that is out of scope for this test. + return layer(ops.convert_to_tensor([[1.0, 1.0]])) + + true = ops.convert_to_tensor(True) + + concrete = fn.get_concrete_function( + tensor_spec.TensorSpec(shape=(), dtype=dtypes.bool)) + self.evaluate(concrete(true)) + self.evaluate(fn(True)) + if __name__ == '__main__': ops.enable_eager_execution() -- GitLab From d98406b2d52abfa8010437da7456da24d68976e2 Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Wed, 13 Feb 2019 16:32:49 -0800 Subject: [PATCH 091/351] Remove unused class in export test file. PiperOrigin-RevId: 233854489 --- .../saved_model/model_utils/export_test.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/tensorflow/python/saved_model/model_utils/export_test.py b/tensorflow/python/saved_model/model_utils/export_test.py index ef512150a2..df9769f809 100644 --- a/tensorflow/python/saved_model/model_utils/export_test.py +++ b/tensorflow/python/saved_model/model_utils/export_test.py @@ -24,7 +24,6 @@ import time from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import test @@ -34,21 +33,6 @@ from tensorflow.python.saved_model.model_utils import export_output from tensorflow.python.saved_model.model_utils import export_utils -class LabeledTensorMock(object): - """Mock class emulating LabeledTensor.""" - - def __init__(self): - self.tensor = constant_op.constant([1]) - - -def _convert_labeled_tensor_mock_to_tensor(value, *args, **kwargs): - return ops.internal_convert_to_tensor(value.tensor, *args, **kwargs) - - -ops.register_tensor_conversion_function(LabeledTensorMock, - _convert_labeled_tensor_mock_to_tensor) - - class ExportTest(test_util.TensorFlowTestCase): @test_util.deprecated_graph_mode_only -- GitLab From 21a83324df6f33ccb9c276d4a28f83902d990899 Mon Sep 17 00:00:00 2001 From: Jeff Poznanovic Date: Wed, 13 Feb 2019 18:04:49 -0700 Subject: [PATCH 092/351] Add no_rocm tag to self_adjoint_eig_op_test_gpu The kernel_tests:self_adjoint_eig_op_test_gpu unit test fails occasionally on ROCm. Disabling / tagging as "no_rocm" (and flaky). --- tensorflow/python/kernel_tests/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5fb7357e10..308b4585c0 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3291,7 +3291,10 @@ cuda_py_test( ], data = ["//tensorflow/python/kernel_tests/testdata:self_adjoint_eig_op_test_files"], shard_count = 20, - tags = ["no_windows"], + tags = [ + "no_rocm", # flaky test + "no_windows", + ], xla_enable_strict_auto_jit = True, ) -- GitLab From fe3aae72f596bd95baea5b5130c33647cbaaaebe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 16:50:04 -0800 Subject: [PATCH 093/351] Disable util_with_v1_optimizers_test failing on Windows with NCCL kernel error. PiperOrigin-RevId: 233857643 --- tensorflow/python/training/checkpointable/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD index e1f58a9e4b..a7ddd81258 100644 --- a/tensorflow/python/training/checkpointable/BUILD +++ b/tensorflow/python/training/checkpointable/BUILD @@ -247,5 +247,8 @@ tf_py_test( "//tensorflow/python/keras:engine", "//tensorflow/python/keras:layers", ], - tags = ["notsan"], # b/74395663 + tags = [ + "no_windows", # b/124401331 + "notsan", # b/74395663 + ], ) -- GitLab From 6c5cf861197b81e0dd650efe97235415c9f4e477 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 16:58:03 -0800 Subject: [PATCH 094/351] n/a PiperOrigin-RevId: 233859067 --- tensorflow/examples/speech_commands/BUILD | 35 +++++++++++++++++++++++ tensorflow/lite/python/BUILD | 8 ++++++ tensorflow/python/tools/BUILD | 14 +++++++++ tensorflow/tools/test/BUILD | 8 ++++++ 4 files changed, 65 insertions(+) diff --git a/tensorflow/examples/speech_commands/BUILD b/tensorflow/examples/speech_commands/BUILD index ca044e57ed..88f7fe7faa 100644 --- a/tensorflow/examples/speech_commands/BUILD +++ b/tensorflow/examples/speech_commands/BUILD @@ -63,6 +63,13 @@ tf_py_test( py_binary( name = "train", + srcs = ["train.py"], + srcs_version = "PY2AND3", + deps = [":train_main_lib"], +) + +py_library( + name = "train_main_lib", srcs = [ "train.py", ], @@ -90,6 +97,13 @@ py_binary( name = "freeze", srcs = ["freeze.py"], srcs_version = "PY2AND3", + deps = [":freeze_main_lib"], +) + +py_library( + name = "freeze_main_lib", + srcs = ["freeze.py"], + srcs_version = "PY2AND3", deps = [":freeze_lib"], ) @@ -122,6 +136,13 @@ py_binary( name = "wav_to_features", srcs = ["wav_to_features.py"], srcs_version = "PY2AND3", + deps = [":wav_to_features_main_lib"], +) + +py_library( + name = "wav_to_features_main_lib", + srcs = ["wav_to_features.py"], + srcs_version = "PY2AND3", deps = [":wav_to_features_lib"], ) @@ -154,6 +175,13 @@ py_binary( name = "generate_streaming_test_wav", srcs = ["generate_streaming_test_wav.py"], srcs_version = "PY2AND3", + deps = [":generate_streaming_test_wav_main_lib"], +) + +py_library( + name = "generate_streaming_test_wav_main_lib", + srcs = ["generate_streaming_test_wav.py"], + srcs_version = "PY2AND3", deps = [":generate_streaming_test_wav_lib"], ) @@ -201,6 +229,13 @@ py_binary( name = "label_wav", srcs = ["label_wav.py"], srcs_version = "PY2AND3", + deps = [":label_wav_main_lib"], +) + +py_library( + name = "label_wav_main_lib", + srcs = ["label_wav.py"], + srcs_version = "PY2AND3", deps = [":label_wav_lib"], ) diff --git a/tensorflow/lite/python/BUILD b/tensorflow/lite/python/BUILD index 6e1f5adc7e..ceda5effed 100644 --- a/tensorflow/lite/python/BUILD +++ b/tensorflow/lite/python/BUILD @@ -39,6 +39,14 @@ py_binary( srcs = ["tflite_convert.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], + deps = [":tflite_convert_main_lib"], +) + +py_library( + name = "tflite_convert_main_lib", + srcs = ["tflite_convert.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [":tflite_convert_lib"], ) diff --git a/tensorflow/python/tools/BUILD b/tensorflow/python/tools/BUILD index f3db3b5952..e483155dcf 100644 --- a/tensorflow/python/tools/BUILD +++ b/tensorflow/python/tools/BUILD @@ -79,6 +79,13 @@ py_binary( name = "freeze_graph", srcs = ["freeze_graph.py"], srcs_version = "PY2AND3", + deps = [":freeze_graph_main_lib"], +) + +py_library( + name = "freeze_graph_main_lib", + srcs = ["freeze_graph.py"], + srcs_version = "PY2AND3", deps = [ ":freeze_graph_lib", ], @@ -209,6 +216,13 @@ py_binary( name = "optimize_for_inference", srcs = ["optimize_for_inference.py"], srcs_version = "PY2AND3", + deps = [":optimize_for_inference_main_lib"], +) + +py_library( + name = "optimize_for_inference_main_lib", + srcs = ["optimize_for_inference.py"], + srcs_version = "PY2AND3", deps = [ ":optimize_for_inference_lib", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/tools/test/BUILD b/tensorflow/tools/test/BUILD index 4b2026b947..ef12226ec0 100644 --- a/tensorflow/tools/test/BUILD +++ b/tensorflow/tools/test/BUILD @@ -57,6 +57,14 @@ py_binary( srcs = ["run_and_gather_logs.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], + deps = [":run_and_gather_logs_main_lib"], +) + +py_library( + name = "run_and_gather_logs_main_lib", + srcs = ["run_and_gather_logs.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [ ":run_and_gather_logs_lib", "//tensorflow/core:protos_all_py", -- GitLab From a83ccd34e729bb642232e63925e5e95e8cf65c1d Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 13 Feb 2019 16:58:28 -0800 Subject: [PATCH 095/351] Move TPU ops to TF core. PiperOrigin-RevId: 233859132 --- tensorflow/cc/BUILD | 19 ++ .../jit/encapsulate_subgraphs_pass_test.cc | 16 -- tensorflow/contrib/BUILD | 1 - tensorflow/contrib/tpu/BUILD | 152 +------------- tensorflow/contrib/tpu/python/ops/tpu_ops.py | 26 +-- .../tpu/python/ops/tpu_ordinal_selector_op.py | 5 +- .../contrib/tpu/python/tpu/functional.py | 20 +- .../contrib/tpu/python/tpu/tpu_embedding.py | 2 +- .../contrib/tpu/python/tpu/tpu_estimator.py | 3 +- tensorflow/core/BUILD | 34 ++++ .../api_def/base_api/api_def_AllToAll.pbtxt | 67 +++++++ .../base_api/api_def_CollectivePermute.pbtxt | 36 ++++ .../api_def_ConfigureDistributedTPU.pbtxt | 30 +++ .../base_api/api_def_CrossReplicaSum.pbtxt | 38 ++++ ..._def_EnqueueTPUEmbeddingIntegerBatch.pbtxt | 27 +++ ...i_def_EnqueueTPUEmbeddingSparseBatch.pbtxt | 65 ++++++ ...EnqueueTPUEmbeddingSparseTensorBatch.pbtxt | 74 +++++++ .../base_api/api_def_InfeedDequeue.pbtxt | 22 ++ .../base_api/api_def_InfeedDequeueTuple.pbtxt | 22 ++ .../base_api/api_def_InfeedEnqueue.pbtxt | 38 ++++ .../base_api/api_def_InfeedEnqueueTuple.pbtxt | 39 ++++ ...i_def_LoadTPUEmbeddingADAMParameters.pbtxt | 29 +++ ...mbeddingADAMParametersGradAccumDebug.pbtxt | 35 ++++ ...f_LoadTPUEmbeddingAdadeltaParameters.pbtxt | 29 +++ ...dingAdadeltaParametersGradAccumDebug.pbtxt | 35 ++++ ...ef_LoadTPUEmbeddingAdagradParameters.pbtxt | 23 +++ ...ddingAdagradParametersGradAccumDebug.pbtxt | 29 +++ ...PUEmbeddingCenteredRMSPropParameters.pbtxt | 35 ++++ ...i_def_LoadTPUEmbeddingFTRLParameters.pbtxt | 29 +++ ...mbeddingFTRLParametersGradAccumDebug.pbtxt | 35 ++++ ...PUEmbeddingMDLAdagradLightParameters.pbtxt | 35 ++++ ...f_LoadTPUEmbeddingMomentumParameters.pbtxt | 23 +++ ...dingMomentumParametersGradAccumDebug.pbtxt | 29 +++ ...PUEmbeddingProximalAdagradParameters.pbtxt | 23 +++ ...ximalAdagradParametersGradAccumDebug.pbtxt | 29 +++ ...ef_LoadTPUEmbeddingRMSPropParameters.pbtxt | 29 +++ ...ddingRMSPropParametersGradAccumDebug.pbtxt | 35 ++++ ...gStochasticGradientDescentParameters.pbtxt | 17 ++ .../base_api/api_def_OutfeedDequeue.pbtxt | 33 +++ .../api_def_OutfeedDequeueTuple.pbtxt | 34 ++++ .../base_api/api_def_OutfeedEnqueue.pbtxt | 10 + .../api_def_OutfeedEnqueueTuple.pbtxt | 11 + .../api_def_RecvTPUEmbeddingActivations.pbtxt | 32 +++ ...f_RetrieveTPUEmbeddingADAMParameters.pbtxt | 28 +++ ...mbeddingADAMParametersGradAccumDebug.pbtxt | 34 ++++ ...trieveTPUEmbeddingAdadeltaParameters.pbtxt | 28 +++ ...dingAdadeltaParametersGradAccumDebug.pbtxt | 34 ++++ ...etrieveTPUEmbeddingAdagradParameters.pbtxt | 22 ++ ...ddingAdagradParametersGradAccumDebug.pbtxt | 28 +++ ...PUEmbeddingCenteredRMSPropParameters.pbtxt | 34 ++++ ...f_RetrieveTPUEmbeddingFTRLParameters.pbtxt | 28 +++ ...mbeddingFTRLParametersGradAccumDebug.pbtxt | 34 ++++ ...PUEmbeddingMDLAdagradLightParameters.pbtxt | 34 ++++ ...trieveTPUEmbeddingMomentumParameters.pbtxt | 22 ++ ...dingMomentumParametersGradAccumDebug.pbtxt | 28 +++ ...PUEmbeddingProximalAdagradParameters.pbtxt | 22 ++ ...ximalAdagradParametersGradAccumDebug.pbtxt | 28 +++ ...etrieveTPUEmbeddingRMSPropParameters.pbtxt | 28 +++ ...ddingRMSPropParametersGradAccumDebug.pbtxt | 34 ++++ ...gStochasticGradientDescentParameters.pbtxt | 16 ++ .../api_def_SendTPUEmbeddingGradients.pbtxt | 32 +++ .../api_def_ShutdownDistributedTPU.pbtxt | 7 + .../api_def_TPUCompilationResult.pbtxt | 4 + .../api_def_TPUEmbeddingActivations.pbtxt | 37 ++++ .../base_api/api_def_TPUOrdinalSelector.pbtxt | 15 ++ .../base_api/api_def_TPUPartitionedCall.pbtxt | 40 ++++ .../base_api/api_def_TPUReplicate.pbtxt | 99 +++++++++ .../api_def_TPUReplicateMetadata.pbtxt | 46 +++++ .../base_api/api_def_TPUReplicatedInput.pbtxt | 4 + .../api_def_TPUReplicatedOutput.pbtxt | 4 + .../base_api/api_def_WorkerHeartbeat.pbtxt | 20 ++ .../tpu => core}/ops/tpu_configuration_ops.cc | 19 +- .../ops/tpu_cross_replica_ops.cc} | 66 +----- .../tpu => core}/ops/tpu_embedding_ops.cc | 189 +----------------- .../ops/tpu_functional_ops.cc} | 0 .../ops/tpu_heartbeat_ops.cc} | 11 +- .../ops/tpu_host_compute_ops.cc} | 0 .../ops/tpu_infeed_ops.cc} | 50 +---- .../ops/tpu_ordinal_selector_ops.cc} | 11 +- .../ops/tpu_outfeed_ops.cc} | 42 +--- .../ops/tpu_replication_ops.cc} | 41 +--- .../{contrib/tpu/utils => core/tpu}/BUILD | 0 ...embedding_optimization_parameters_utils.cc | 2 +- ..._embedding_optimization_parameters_utils.h | 6 +- .../tpu}/tpu_embedding_output_layout_utils.cc | 2 +- .../tpu}/tpu_embedding_output_layout_utils.h | 6 +- tensorflow/python/BUILD | 20 ++ 87 files changed, 1999 insertions(+), 611 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CollectivePermute.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ConfigureDistributedTPU.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_CrossReplicaSum.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingIntegerBatch.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseTensorBatch.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_InfeedDequeue.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_InfeedDequeueTuple.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_InfeedEnqueue.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingCenteredRMSPropParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMDLAdagradLightParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingStochasticGradientDescentParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_OutfeedDequeue.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_OutfeedDequeueTuple.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_OutfeedEnqueue.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_OutfeedEnqueueTuple.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RecvTPUEmbeddingActivations.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingADAMParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingCenteredRMSPropParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMDLAdagradLightParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_RetrieveTPUEmbeddingStochasticGradientDescentParameters.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_SendTPUEmbeddingGradients.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ShutdownDistributedTPU.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUCompilationResult.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUEmbeddingActivations.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUOrdinalSelector.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUPartitionedCall.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUReplicate.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUReplicateMetadata.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_WorkerHeartbeat.pbtxt rename tensorflow/{contrib/tpu => core}/ops/tpu_configuration_ops.cc (92%) rename tensorflow/{contrib/tpu/ops/cross_replica_ops.cc => core/ops/tpu_cross_replica_ops.cc} (54%) rename tensorflow/{contrib/tpu => core}/ops/tpu_embedding_ops.cc (63%) rename tensorflow/{contrib/tpu/ops/functional_ops.cc => core/ops/tpu_functional_ops.cc} (100%) rename tensorflow/{contrib/tpu/ops/heartbeat_ops.cc => core/ops/tpu_heartbeat_ops.cc} (72%) rename tensorflow/{contrib/tpu/ops/host_compute_ops.cc => core/ops/tpu_host_compute_ops.cc} (100%) rename tensorflow/{contrib/tpu/ops/infeed_ops.cc => core/ops/tpu_infeed_ops.cc} (51%) rename tensorflow/{contrib/tpu/ops/tpu_ordinal_selector_op.cc => core/ops/tpu_ordinal_selector_ops.cc} (80%) rename tensorflow/{contrib/tpu/ops/outfeed_ops.cc => core/ops/tpu_outfeed_ops.cc} (59%) rename tensorflow/{contrib/tpu/ops/replication_ops.cc => core/ops/tpu_replication_ops.cc} (69%) rename tensorflow/{contrib/tpu/utils => core/tpu}/BUILD (100%) rename tensorflow/{contrib/tpu/utils => core/tpu}/tpu_embedding_optimization_parameters_utils.cc (99%) rename tensorflow/{contrib/tpu/utils => core/tpu}/tpu_embedding_optimization_parameters_utils.h (93%) rename tensorflow/{contrib/tpu/utils => core/tpu}/tpu_embedding_output_layout_utils.cc (97%) rename tensorflow/{contrib/tpu/utils => core/tpu}/tpu_embedding_output_layout_utils.h (85%) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index cf6d6050fa..4c4d587fce 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -587,6 +587,25 @@ tf_gen_op_wrappers_cc( pkg = "//tensorflow/core", ) +tf_gen_op_wrappers_cc( + name = "tpu_ops", + include_internal_ops = 1, + op_lib_names = [ + "tpu_configuration_ops", + "tpu_cross_replica_ops", + "tpu_embedding_ops", + "tpu_functional_ops", + "tpu_heartbeat_ops", + "tpu_host_compute_ops", + "tpu_infeed_ops", + "tpu_outfeed_ops", + "tpu_ordinal_selector_ops", + "tpu_replication_ops", + ], + pkg = "//tensorflow/core", + visibility = ["//tensorflow:internal"], +) + cc_library_with_android_deps( name = "cc_op_gen_main", srcs = [ diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index 1f8ec09e19..261519de34 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -307,22 +307,6 @@ REGISTER_OP("XlaHostCompute") .Attr("shapes: list(shape) >= 0") .SetShapeFn(::tensorflow::shape_inference::UnknownShape); -REGISTER_OP("_XlaSendFromHost") - .Input("inputs: Tinputs") - .Input("dynamic_key: string") - .Attr("Tinputs: list(type) >= 0") - .Attr("key: string") - .Attr("device_ordinal: int") - .SetShapeFn(::tensorflow::shape_inference::UnknownShape); - -REGISTER_OP("_XlaRecvAtHost") - .Input("dynamic_key: string") - .Output("outputs: Toutputs") - .Attr("Toutputs: list(type) >= 0") - .Attr("key: string") - .Attr("device_ordinal: int") - .SetShapeFn(::tensorflow::shape_inference::UnknownShape); - REGISTER_OP("InputTest") .Output("o: float") .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 25f2640e35..0173b8bb06 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -218,7 +218,6 @@ cc_library( "//tensorflow/contrib/tensor_forest:stats_ops_op_lib", "//tensorflow/contrib/tensor_forest:tensor_forest_ops_op_lib", "//tensorflow/contrib/text:all_ops", - "//tensorflow/contrib/tpu:all_ops", ] + select({ "//tensorflow:android": [], "//tensorflow:ios": [], diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 294dbddcb5..d580ca6eb6 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -23,17 +23,13 @@ package( ], ) -cc_library( - name = "all_ops", +py_library( + name = "tpu_py", + srcs = ["python/ops/tpu_ops.py"], + srcs_version = "PY2AND3", deps = [ - ":cross_replica_ops_op_lib", - ":heartbeat_ops_op_lib", - ":host_compute_ops_op_lib", - ":infeed_ops_op_lib", - ":outfeed_ops_op_lib", - ":replication_ops_op_lib", - ":tpu_configuration_ops_op_lib", - ":tpu_embedding_ops_op_lib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:tpu_ops_gen", ], ) @@ -75,7 +71,6 @@ py_library( ":functional", ":tpu_embedding", ":tpu_lib", - ":tpu_ordinal_selector_py", "//tensorflow/contrib/training:training_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -98,122 +93,15 @@ py_library( ], ) -tf_gen_op_libs( - op_lib_names = [ - "cross_replica_ops", - "heartbeat_ops", - "host_compute_ops", - "infeed_ops", - "outfeed_ops", - "replication_ops", - "tpu_configuration_ops", - "tpu_embedding_ops", - "tpu_ordinal_selector_op", - "functional_ops", - ], - deps = [ - "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils", - "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils", - "//tensorflow/core:lib", - "//tensorflow/core:lib_proto_parsing", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_cc", - ], -) - -tf_custom_op_library( - name = "python/ops/_tpu_ops.so", - srcs = [ - "ops/cross_replica_ops.cc", - "ops/heartbeat_ops.cc", - "ops/host_compute_ops.cc", - "ops/infeed_ops.cc", - "ops/outfeed_ops.cc", - "ops/replication_ops.cc", - "ops/tpu_configuration_ops.cc", - "ops/tpu_embedding_ops.cc", - ], - deps = [ - "//tensorflow/contrib/tpu/utils:tpu_embedding_optimization_parameters_utils", - "//tensorflow/contrib/tpu/utils:tpu_embedding_output_layout_utils", - "//tensorflow/core:lib_proto_parsing", - "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_cc", - ], -) - -tf_gen_op_wrapper_py( - name = "tpu_ops", - hidden = [ - "SendTPUEmbeddingGradients", - "EnqueueTPUEmbeddingIntegerBatch", - "EnqueueTPUEmbeddingSparseBatch", - "EnqueueTPUEmbeddingSparseTensorBatch", - ], - deps = [ - ":cross_replica_ops_op_lib", - ":heartbeat_ops_op_lib", - ":host_compute_ops_op_lib", - ":infeed_ops_op_lib", - ":outfeed_ops_op_lib", - ":replication_ops_op_lib", - ":tpu_configuration_ops_op_lib", - ":tpu_embedding_ops_op_lib", - ], -) - -tf_custom_op_library( - name = "python/ops/_tpu_ordinal_selector_op.so", - srcs = ["ops/tpu_ordinal_selector_op.cc"], -) - -tf_custom_op_py_library( - name = "tpu_ordinal_selector_py", - srcs = ["python/ops/tpu_ordinal_selector_op.py"], - dso = [":python/ops/_tpu_ordinal_selector_op.so"], - kernels = [ - ":tpu_ordinal_selector_op_op_lib", - ], - srcs_version = "PY2AND3", - visibility = ["//visibility:public"], - deps = [ - ":tpu_ordinal_selector_op", - ], -) - -tf_gen_op_wrapper_py( - name = "tpu_ordinal_selector_op", - deps = [ - ":tpu_ordinal_selector_op_op_lib", - ], -) - -tf_custom_op_library( - name = "python/ops/_functional_ops.so", - srcs = ["ops/functional_ops.cc"], -) - -tf_gen_op_wrapper_py( - name = "gen_functional_ops", - out = "python/tpu/gen_functional_ops.py", - hidden = [ - "TPUPartitionedCall", - ], - deps = [":functional_ops_op_lib"], -) - -tf_custom_op_py_library( +py_library( name = "functional", srcs = ["python/tpu/functional.py"], - dso = [":python/ops/_functional_ops.so"], - kernels = [ - ":functional_ops_op_lib", - ], srcs_version = "PY2AND3", visibility = [ "//visibility:public", ], deps = [ - ":gen_functional_ops", + "//tensorflow/python:tpu_ops_gen", ], ) @@ -229,26 +117,6 @@ py_library( ], ) -tf_custom_op_py_library( - name = "tpu_py", - srcs = ["python/ops/tpu_ops.py"], - dso = [":python/ops/_tpu_ops.so"], - kernels = [ - ":all_ops", - ], - srcs_version = "PY2AND3", - deps = [ - ":profiler", - ":tpu_ops", - "//tensorflow/contrib/compiler:xla", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:platform", - "//tensorflow/python:util", - ], -) - py_library( name = "tpu", srcs = [ @@ -327,7 +195,6 @@ py_library( ":datasets", ":functional", ":profiler", - ":tpu_ordinal_selector_py", ":tpu_py", "//tensorflow/compiler/xla/experimental/xla_sharding", "//tensorflow/compiler/xla/python_api:xla_shape", @@ -347,6 +214,7 @@ py_library( "//tensorflow/python:framework", "//tensorflow/python:framework_ops", "//tensorflow/python:tensor_shape", + "//tensorflow/python:tpu_ops_gen", "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python:variable_scope", @@ -470,13 +338,13 @@ py_library( srcs_version = "PY2AND3", deps = [ ":tpu_lib", - ":tpu_ops", "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_py", "//tensorflow/python:array_ops", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:init_ops", "//tensorflow/python:math_ops", "//tensorflow/python:partitioned_variables", + "//tensorflow/python:tpu_ops_gen", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "@six_archive//:six", diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py index 55f7c6bcbc..2320306ba9 100644 --- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py +++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py @@ -28,16 +28,10 @@ from tensorflow.python.platform import tf_logging as logging if platform.system() != "Windows": # pylint: disable=wildcard-import,unused-import,g-import-not-at-top - from tensorflow.contrib.tpu.ops import gen_tpu_ops - from tensorflow.contrib.tpu.ops.gen_tpu_ops import * - - from tensorflow.contrib.util import loader - from tensorflow.python.platform import resource_loader + from tensorflow.python.ops import gen_tpu_ops + from tensorflow.python.ops.gen_tpu_ops import * # pylint: enable=wildcard-import,unused-import,g-import-not-at-top - _tpu_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("_tpu_ops.so")) - def _create_default_group_assignment(): num_shards = tpu_function.get_tpu_context().number_of_shards if num_shards is None: @@ -237,12 +231,12 @@ if platform.system() != "Windows": """ if learning_rates is None: learning_rates = [] - return gen_tpu_ops._send_tpu_embedding_gradients( + return gen_tpu_ops.send_tpu_embedding_gradients( inputs=inputs, learning_rates=learning_rates, config=config, name=name) send_tpu_embedding_gradients.__doc__ = ( - gen_tpu_ops._send_tpu_embedding_gradients.__doc__) + gen_tpu_ops.send_tpu_embedding_gradients.__doc__) # pylint: disable=protected-access def enqueue_tpu_embedding_integer_batch(batch, @@ -268,14 +262,14 @@ if platform.system() != "Windows": """ if mode_override is None: mode_override = "unspecified" - return gen_tpu_ops._enqueue_tpu_embedding_integer_batch( + return gen_tpu_ops.enqueue_tpu_embedding_integer_batch( batch=batch, device_ordinal=device_ordinal, mode_override=mode_override, name=name) enqueue_tpu_embedding_integer_batch.__doc__ = ( - gen_tpu_ops._enqueue_tpu_embedding_integer_batch.__doc__) + gen_tpu_ops.enqueue_tpu_embedding_integer_batch.__doc__) # pylint: disable=protected-access def enqueue_tpu_embedding_sparse_batch(sample_indices, @@ -317,7 +311,7 @@ if platform.system() != "Windows": """ if mode_override is None: mode_override = "unspecified" - return gen_tpu_ops._enqueue_tpu_embedding_sparse_batch( + return gen_tpu_ops.enqueue_tpu_embedding_sparse_batch( sample_indices=sample_indices, embedding_indices=embedding_indices, aggregation_weights=aggregation_weights, @@ -327,7 +321,7 @@ if platform.system() != "Windows": name=name) enqueue_tpu_embedding_sparse_batch.__doc__ = ( - gen_tpu_ops._enqueue_tpu_embedding_sparse_batch.__doc__) + gen_tpu_ops.enqueue_tpu_embedding_sparse_batch.__doc__) # pylint: disable=protected-access def enqueue_tpu_embedding_sparse_tensor_batch(sample_indices, @@ -375,7 +369,7 @@ if platform.system() != "Windows": """ if mode_override is None: mode_override = "unspecified" - return gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch( + return gen_tpu_ops.enqueue_tpu_embedding_sparse_tensor_batch( sample_indices=sample_indices, embedding_indices=embedding_indices, aggregation_weights=aggregation_weights, @@ -386,7 +380,7 @@ if platform.system() != "Windows": name=name) enqueue_tpu_embedding_sparse_tensor_batch.__doc__ = ( - gen_tpu_ops._enqueue_tpu_embedding_sparse_tensor_batch.__doc__) + gen_tpu_ops.enqueue_tpu_embedding_sparse_tensor_batch.__doc__) else: # We have already built the appropriate libraries into the binary via CMake diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py b/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py index 5ca38cd1ba..6917ac2e1a 100644 --- a/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py +++ b/tensorflow/contrib/tpu/python/ops/tpu_ordinal_selector_op.py @@ -23,15 +23,12 @@ import platform if platform.system() != "Windows": # pylint: disable=wildcard-import,unused-import,g-import-not-at-top - from tensorflow.contrib.tpu.ops.gen_tpu_ordinal_selector_op import * + from tensorflow.python.ops.gen_tpu_ops import tpu_ordinal_selector from tensorflow.contrib.util import loader from tensorflow.python.platform import resource_loader # pylint: enable=wildcard-import,unused-import,g-import-not-at-top - _tpu_ordinal_selector_op = loader.load_op_library( - resource_loader.get_path_to_datafile("_tpu_ordinal_selector_op.so")) - else: # We have already built the appropriate libraries into the binary via CMake # if we have built contrib, so we don't need this diff --git a/tensorflow/contrib/tpu/python/tpu/functional.py b/tensorflow/contrib/tpu/python/tpu/functional.py index 24c85156e5..3d04c64033 100644 --- a/tensorflow/contrib/tpu/python/tpu/functional.py +++ b/tensorflow/contrib/tpu/python/tpu/functional.py @@ -18,22 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import platform +from tensorflow.contrib.tpu.python.ops import tpu_ops -from tensorflow.contrib.tpu.python.tpu import gen_functional_ops - - -TPUPartitionedCall = gen_functional_ops._tpu_partitioned_call # pylint: disable=invalid-name,protected-access - - -if platform.system() != "Windows": - # pylint: disable=wildcard-import,unused-import,g-import-not-at-top - from tensorflow.contrib.tpu.ops.gen_tpu_ordinal_selector_op import * - - from tensorflow.contrib.util import loader - from tensorflow.python.platform import resource_loader - # pylint: enable=wildcard-import,unused-import,g-import-not-at-top - - _tpu_partitioned_call_op = loader.load_op_library( - resource_loader.get_path_to_datafile("../ops/_functional_ops.so") - ) +TPUPartitionedCall = tpu_ops.tpu_partitioned_call # pylint: disable=invalid-name diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py index eb99a18d83..fcad7b2972 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py @@ -25,7 +25,6 @@ import re import six from tensorflow.contrib.framework.python.framework import experimental -from tensorflow.contrib.tpu.ops import gen_tpu_ops from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu_system_metadata as tpu_system_metadata_lib from tensorflow.core.protobuf.tpu import optimization_parameters_pb2 @@ -35,6 +34,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_tpu_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 4f761e3599..988591499f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -32,7 +32,6 @@ from six.moves import queue as Queue # pylint: disable=redefined-builtin from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.contrib.tpu.python.ops import tpu_ops -from tensorflow.contrib.tpu.python.ops import tpu_ordinal_selector_op from tensorflow.contrib.tpu.python.tpu import _tpu_estimator_embedding from tensorflow.contrib.tpu.python.tpu import error_handling from tensorflow.contrib.tpu.python.tpu import functional as tpu_functional @@ -1370,7 +1369,7 @@ def call_computation(computation, return tpu_functional.TPUPartitionedCall( args=tpu_subgraph.captured_inputs, - device_ordinal=tpu_ordinal_selector_op.tpu_ordinal_selector(), + device_ordinal=tpu_ops.tpu_ordinal_selector(), Tout=[o.type for o in tpu_subgraph.definition.signature.output_arg], f=tpu_subgraph) else: diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 11237b39e1..fb93e8ddd3 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1168,6 +1168,29 @@ tf_gen_op_libs( deps = [":lib"], ) +tf_gen_op_libs( + op_lib_names = [ + "tpu_configuration_ops", + "tpu_cross_replica_ops", + "tpu_embedding_ops", + "tpu_functional_ops", + "tpu_heartbeat_ops", + "tpu_host_compute_ops", + "tpu_infeed_ops", + "tpu_outfeed_ops", + "tpu_ordinal_selector_ops", + "tpu_replication_ops", + ], + deps = [ + ":lib", + ":lib_proto_parsing", + ":protos_all_cc", + "//tensorflow/core/protobuf/tpu:tpu_embedding_configuration_proto_cc", + "//tensorflow/core/tpu:tpu_embedding_optimization_parameters_utils", + "//tensorflow/core/tpu:tpu_embedding_output_layout_utils", + ], +) + # And one for all user ops cc_library( name = "user_ops_op_lib", @@ -1284,6 +1307,16 @@ cc_library( ":state_ops_op_lib", ":stateless_random_ops_op_lib", ":string_ops_op_lib", + ":tpu_configuration_ops_op_lib", + ":tpu_cross_replica_ops_op_lib", + ":tpu_embedding_ops_op_lib", + ":tpu_functional_ops_op_lib", + ":tpu_heartbeat_ops_op_lib", + ":tpu_host_compute_ops_op_lib", + ":tpu_infeed_ops_op_lib", + ":tpu_outfeed_ops_op_lib", + ":tpu_ordinal_selector_ops_op_lib", + ":tpu_replication_ops_op_lib", ":training_ops_op_lib", ":user_ops_op_lib", ":word2vec_ops", @@ -1898,6 +1931,7 @@ filegroup( "**/*testutil*", "**/*testlib*", "**/*main.cc", + "**/tpu_*", ], ), visibility = ["//visibility:public"], diff --git a/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt b/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt new file mode 100644 index 0000000000..d6f28bd022 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_AllToAll.pbtxt @@ -0,0 +1,67 @@ +op { + graph_op_name: "AllToAll" + in_arg { + name: "input" + description: <= 0 and less than the number +of TPU cores in the task on which the node is placed. +END + } + summary: "An op that enqueues a list of input batch tensors to TPUEmbedding." +} diff --git a/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt new file mode 100644 index 0000000000..bb476ce3fc --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_EnqueueTPUEmbeddingSparseBatch.pbtxt @@ -0,0 +1,65 @@ +op { + graph_op_name: "EnqueueTPUEmbeddingSparseBatch" + in_arg { + name: "sample_indices" + description: <= 0 and less than the number +of TPU cores in the task on which the node is placed. +END + } + attr { + name: "combiners" + description: <= 0 and less than the number +of TPU cores in the task on which the node is placed. +END + } + attr { + name: "combiners" + description: <= 0 when the Op is running on the CPU +device. +END + } + summary: "An op which feeds a single Tensor value into the computation." +} diff --git a/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt new file mode 100644 index 0000000000..f87d6d76d8 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_InfeedEnqueueTuple.pbtxt @@ -0,0 +1,39 @@ +op { + graph_op_name: "InfeedEnqueueTuple" + in_arg { + name: "inputs" + description: <= 0 when the Op is running on the CPU +device. +END + } + summary: "Feeds multiple Tensor values into the computation as an XLA tuple." +} diff --git a/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt new file mode 100644 index 0000000000..43901e1e44 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LoadTPUEmbeddingADAMParameters.pbtxt @@ -0,0 +1,29 @@ +op { + graph_op_name: "LoadTPUEmbeddingADAMParameters" + in_arg { + name: "parameters" + description: <= 0 when the Op is running on the CPU +device. +END + } + summary: "Retrieves a single tensor from the computation outfeed." + description: <= 0 when the Op is running on the CPU +device. +END + } + summary: "Retrieve multiple values from the computation outfeed." + description: <set_output(0, c->MakeShape(dims)); return Status::OK(); - }) - .Doc(R"doc( -An Op to exchange data across TPU replicas. On each replica, the input is -split into `split_count` blocks along `split_dimension` and send to the other -replicas given group_assignment. After receiving `split_count` - 1 blocks from -other replicas, we concatenate the blocks along `concat_dimension` as the -output. - -For example, suppose there are 2 TPU replicas: -replica 0 receives input: `[[A, B]]` -replica 1 receives input: `[[C, D]]` - -group_assignment=`[[0, 1]]` -concat_dimension=0 -split_dimension=1 -split_count=2 - -replica 0's output: `[[A], [C]]` -replica 1's output: `[[B], [D]]` - -input: The local input to the sum. -group_assignment: An int32 tensor with shape - [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the - replica ids in the ith subgroup. -concat_dimension: The dimension number to concatenate. -split_dimension: The dimension number to split. -split_count: The number of splits, this number must equal to the sub-group - size(group_assignment.get_shape()[1]) -output: The exchanged result. -T: The type of elements to be exchanged. -)doc"); + }); REGISTER_OP("CrossReplicaSum") .Input("input: T") .Input("group_assignment: int32") .Output("output: T") .Attr("T: {bfloat16, float}") - .SetShapeFn(shape_inference::UnchangedShape) - .Doc(R"doc( -An Op to sum inputs across replicated TPU instances. Each instance supplies its -own input. - -For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`. -Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0, -and `B, D, F, H` as group 1. Thus we get the outputs: -`[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`. - -input: The local input to the sum. -group_assignment: An int32 tensor with shape - [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the - replica ids in the ith subgroup. -output: The sum of all the distributed inputs. -T: The type of elements to be summed. -)doc"); + .SetShapeFn(shape_inference::UnchangedShape); REGISTER_OP("CollectivePermute") .Input("input: T") .Input("source_target_pairs: int32") .Output("output: T") .Attr("T: numbertype") - .SetShapeFn(shape_inference::UnchangedShape) - .Doc(R"doc( -An Op to permute tensors across replicated TPU instances. Each instance -supplies its own input. - -For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing -source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs: -`[D, A, B, C]`. - -input: The local input to be permuted. Currently only supports float and - bfloat16. -source_target_pairs: A tensor with shape [num_pairs, 2]. -output: The permuted input. -T: The type of elements to be exchanged. -)doc"); + .SetShapeFn(shape_inference::UnchangedShape); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc b/tensorflow/core/ops/tpu_embedding_ops.cc similarity index 63% rename from tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc rename to tensorflow/core/ops/tpu_embedding_ops.cc index b991698359..79ebc09adc 100644 --- a/tensorflow/contrib/tpu/ops/tpu_embedding_ops.cc +++ b/tensorflow/core/ops/tpu_embedding_ops.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h" -#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" @@ -23,6 +21,8 @@ limitations under the License. #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/protobuf/tpu/tpu_embedding_configuration.pb.h" +#include "tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h" +#include "tensorflow/core/tpu/tpu_embedding_output_layout_utils.h" namespace tensorflow { @@ -96,10 +96,6 @@ Status RegisterPerTableLoadOpsForAlgorithmBody( if (parameter.has_user_defined() || is_debug_op) { auto* arg = op_def->add_input_arg(); arg->set_name(parameter.name()); - arg->set_description( - strings::StrCat("Value of ", parameter.name(), " used in the ", - GetOptimizationAlgorithmFriendlyName(alg), - " optimization algorithm.")); arg->set_type(DT_FLOAT); } } @@ -127,7 +123,6 @@ Status RegisterPerTableLoadOpsForAlgorithmBody( shard_id_attr->set_name("shard_id"); shard_id_attr->set_type("int"); } - op_def->set_summary("Load embedding parameters for a single table."); string parameter_descriptions; for (const auto& parameter : state_variable_specs) { if (parameter.has_user_defined() || is_debug_op) { @@ -139,21 +134,6 @@ lookups using the %s optimization algorithm.)", GetOptimizationAlgorithmFriendlyName(alg).c_str()); } } - op_def->set_description(strings::Printf(R"doc( -An op that loads optimization parameters into HBM for embedding. Must be -preceded by a ConfigureTPUEmbeddingHost op that sets up the correct -embedding table configuration. For example, this op is used to install -parameters that are loaded from a checkpoint before a training loop is -executed. -%s -table_name: Name of this table; must match a name in the - TPUEmbeddingConfiguration proto (overrides table_id). -num_shards: Number of shards into which the embedding tables are divided. -shard_id: Identifier of shard for this operation. -table_id: Index of this table in the EmbeddingLayerConfiguration proto - (deprecated). -)doc", - parameter_descriptions.c_str())); op_def->set_is_commutative(false); op_def->set_is_aggregate(false); op_def->set_is_stateful(true); @@ -233,10 +213,6 @@ Status RegisterPerTableRetrieveOpsForAlgorithmBody( if (parameter.has_user_defined() || is_debug_op) { auto* arg = op_def->add_output_arg(); arg->set_name(parameter.name()); - arg->set_description( - strings::StrCat("Parameter ", parameter.name(), " updated by the ", - tpu::GetOptimizationAlgorithmFriendlyName(alg), - " optimization algorithm.")); arg->set_type(DT_FLOAT); } } @@ -264,7 +240,6 @@ Status RegisterPerTableRetrieveOpsForAlgorithmBody( shard_id_attr->set_name("shard_id"); shard_id_attr->set_type("int"); } - op_def->set_summary("Retrieve embedding parameters for a single table."); string parameter_descriptions; for (const auto& param : state_variable_specs) { if (param.has_user_defined() || is_debug_op) { @@ -276,20 +251,6 @@ parameters from embedding updates using the %s optimization algorithm.)", tpu::GetOptimizationAlgorithmFriendlyName(alg).c_str()); } } - op_def->set_description(strings::Printf(R"doc( -An op that retrieves optimization parameters from embedding to host -memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up -the correct embedding table configuration. For example, this op is -used to retrieve updated parameters before saving a checkpoint. -%s -table_name: Name of this table; must match a name in the - TPUEmbeddingConfiguration proto (overrides table_id). -num_shards: Number of shards into which the embedding tables are divided. -shard_id: Identifier of shard for this operation. -table_id: Index of this table in the EmbeddingLayerConfiguration proto - (deprecated). -)doc", - parameter_descriptions.c_str())); op_def->set_is_commutative(false); op_def->set_is_aggregate(false); op_def->set_is_stateful(true); @@ -388,23 +349,7 @@ REGISTER_OP("RecvTPUEmbeddingActivations") c->set_output(i, output_shape); } return Status::OK(); - }) - .Doc(R"doc( -An op that receives embedding activations on the TPU. - -The TPU system performs the embedding lookups and aggregations specified by -the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The -results of these aggregations are visible to the Tensorflow Graph as the -outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing -one Tensor of activations per table specified in the model. There can be at -most one RecvTPUEmbeddingActivations op in the TPU graph. - -outputs: A TensorList of embedding activations containing one Tensor per - embedding table in the model. -num_outputs: The number of output activation tensors, equal to the number of - embedding tables in the model. -config: Serialized TPUEmbeddingConfiguration proto. -)doc"); + }); REGISTER_OP("TPUEmbeddingActivations") .Input("embedding_variable: float32") @@ -415,23 +360,7 @@ REGISTER_OP("TPUEmbeddingActivations") .SetShapeFn([](shape_inference::InferenceContext *c) { c->set_output(0, c->input(1)); return Status::OK(); - }) - .Doc(R"doc( -An op enabling differentiation of TPU Embeddings. - -This op simply returns its first input, which is assumed to have been sliced -from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of this -op, and its first argument being a trainable Variable, enables automatic -differentiation of graphs containing embeddings via the TPU Embedding Python -libraries. - -embedding_variable: A trainable variable, enabling optimizers to find this op. -sliced_activations: The embedding activations Tensor to return. -table_id: The id of the table in the embedding layer configuration from which - these activations were computed. -lookup_id: Identifier of the set of embedding indices which produced these - activations. -)doc"); + }); REGISTER_OP("SendTPUEmbeddingGradients") .Input("inputs: N * float32") @@ -453,25 +382,7 @@ REGISTER_OP("SendTPUEmbeddingGradients") } return Status::OK(); - }) - .Doc(R"doc( -An op that performs gradient updates of embedding tables using the specified -learning rates. - -inputs: A TensorList of gradients with which to update embedding tables. - This argument has the same length and shapes as the return value of - RecvTPUEmbeddingActivations, but contains gradients of the model's loss - with respect to the embedding activations. The embedding tables are updated - from these gradients via the optimizer specified in the TPU embedding - configuration given to tpu.initialize_system. -learning_rates: A TensorList of float32 scalars, one for each dynamic learning - rate tag: see the comments in - //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto. - Multiple tables can share the same dynamic learning rate tag as specified - in the configuration. If the learning rates for all tables are constant, - this list should be empty. -config: Serialized TPUEmbeddingConfiguration proto. -)doc"); + }); REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch") .Input("batch: N * int32") @@ -479,19 +390,7 @@ REGISTER_OP("EnqueueTPUEmbeddingIntegerBatch") .Attr("N: int >= 1") .Attr("device_ordinal: int = -1") .SetIsStateful() - .SetShapeFn(shape_inference::UnknownShape) - .Doc(R"doc( -An op that enqueues a list of input batch tensors to TPUEmbedding. - -batch: A list of 1D tensors, one for each embedding table, containing the - indices into the tables. -mode_override: A string input that overrides the mode specified in the - TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', - 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set - in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -device_ordinal: The TPU device to use. Should be >= 0 and less than the number - of TPU cores in the task on which the node is placed. -)doc"); + .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("EnqueueTPUEmbeddingSparseBatch") .Input("sample_indices: N * int32") @@ -514,41 +413,7 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseBatch") } return Status::OK(); - }) - .Doc(R"doc( -An op that enqueues TPUEmbedding input indices from a SparseTensor. - -This Op eases the porting of code that uses embedding_lookup_sparse(), -although some Python preprocessing of the SparseTensor arguments to -embedding_lookup_sparse() is required to produce the arguments to this Op, -since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training -step. - -The tensors at corresponding positions in the three input lists -must have the same shape, i.e. rank 1 with dim_size() equal to the total -number of lookups into the table described by the corresponding table_id. - -sample_indices: A list of rank 1 Tensors specifying the training example and - feature to which the corresponding embedding_indices and aggregation_weights - values belong. sample_indices[i] must equal b * nf + f, where nf is the - number of features from the corresponding table, f is in [0, nf), and - b is in [0, batch size). -embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. -aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per - (training example, feature) -- aggregation weights. -mode_override: A string input that overrides the mode specified in the - TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', - 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set - in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -device_ordinal: The TPU device to use. Should be >= 0 and less than the number - of TPU cores in the task on which the node is placed. -combiners: A list of string scalars, one for each embedding table that specify - how to normalize the embedding activations after weighted summation. - Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have - the sum of the weights be 0 for 'mean' or the sum of the squared weights be - 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for - all tables. -)doc"); + }); REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch") .Input("sample_indices: N * int32") @@ -560,44 +425,6 @@ REGISTER_OP("EnqueueTPUEmbeddingSparseTensorBatch") .Attr("combiners: list(string) = []") .Attr("table_ids: list(int)") .SetIsStateful() - .SetShapeFn(shape_inference::UnknownShape) - .Doc(R"doc( -This Op eases the porting of code that uses tf.nn.embedding_lookup_sparse(). - -sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond -to the ith feature. table_ids[i] indicates which embedding table to look up ith -feature. - -The tensors at corresponding positions in the three input lists (sample_indices, -embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1 -with dim_size() equal to the total number of lookups into the table described by -the corresponding feature. - -sample_indices: A list of rank 1 Tensors specifying the training example to - which the corresponding embedding_indices and aggregation_weights values - belong. It corresponds to sp_ids.indices[:,0] in embedding_lookup_sparse(). -embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. - It corresponds to sp_ids.values in embedding_lookup_sparse(). -aggregation_weights: A list of rank 1 Tensors containing per training example - aggregation weights. It corresponds to sp_weights.values in - embedding_lookup_sparse(). -mode_override: A string input that overrides the mode specified in the - TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', - 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set - in TPUEmbeddingConfiguration is used, otherwise mode_override is used. -device_ordinal: The TPU device to use. Should be >= 0 and less than the number - of TPU cores in the task on which the node is placed. -combiners: A list of string scalars, one for each embedding table that specify - how to normalize the embedding activations after weighted summation. - Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have - the sum of the weights be 0 for 'mean' or the sum of the squared weights be - 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for - all tables. -table_ids: A list of integers specifying the identifier of the embedding table - (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the - corresponding input. The ith input is looked up using table_ids[i]. The size - of the table_ids list must be equal to that of sample_indices, - embedding_indices and aggregation_weights. -)doc"); + .SetShapeFn(shape_inference::UnknownShape); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/functional_ops.cc b/tensorflow/core/ops/tpu_functional_ops.cc similarity index 100% rename from tensorflow/contrib/tpu/ops/functional_ops.cc rename to tensorflow/core/ops/tpu_functional_ops.cc diff --git a/tensorflow/contrib/tpu/ops/heartbeat_ops.cc b/tensorflow/core/ops/tpu_heartbeat_ops.cc similarity index 72% rename from tensorflow/contrib/tpu/ops/heartbeat_ops.cc rename to tensorflow/core/ops/tpu_heartbeat_ops.cc index ca0f5bc0e5..660aa32c82 100644 --- a/tensorflow/contrib/tpu/ops/heartbeat_ops.cc +++ b/tensorflow/core/ops/tpu_heartbeat_ops.cc @@ -23,15 +23,6 @@ REGISTER_OP("WorkerHeartbeat") .Input("request: string") .Output("response: string") .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Worker heartbeat op. - -Heartbeats may be sent periodically to indicate the coordinator is still active, -to retrieve the current worker status and to expedite shutdown when necessary. - -request: A string tensor containing a serialized WorkerHeartbeatRequest -response: A string tensor containing a serialized WorkerHeartbeatResponse -)doc"); + .SetShapeFn(shape_inference::ScalarShape); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/host_compute_ops.cc b/tensorflow/core/ops/tpu_host_compute_ops.cc similarity index 100% rename from tensorflow/contrib/tpu/ops/host_compute_ops.cc rename to tensorflow/core/ops/tpu_host_compute_ops.cc diff --git a/tensorflow/contrib/tpu/ops/infeed_ops.cc b/tensorflow/core/ops/tpu_infeed_ops.cc similarity index 51% rename from tensorflow/contrib/tpu/ops/infeed_ops.cc rename to tensorflow/core/ops/tpu_infeed_ops.cc index 2ed16c2a22..0090b761c4 100644 --- a/tensorflow/contrib/tpu/ops/infeed_ops.cc +++ b/tensorflow/core/ops/tpu_infeed_ops.cc @@ -27,14 +27,7 @@ REGISTER_OP("InfeedDequeue") .Attr("dtype: type") .Attr("shape: shape") .SetIsStateful() - .SetShapeFn(shape_inference::ExplicitShape) - .Doc(R"doc( -A placeholder op for a value that will be fed into the computation. - -output: A tensor that will be provided using the infeed mechanism. -dtype: The type of elements in the tensor. -shape: The shape of the tensor. -)doc"); + .SetShapeFn(shape_inference::ExplicitShape); REGISTER_OP("InfeedEnqueue") .Input("input: dtype") @@ -43,20 +36,7 @@ REGISTER_OP("InfeedEnqueue") .Attr("layout: list(int) = []") .Attr("device_ordinal: int = -1") .SetShapeFn(shape_inference::NoOutputs) - .SetIsStateful() - .Doc(R"doc( -An op which feeds a single Tensor value into the computation. - -input: A tensor that will be provided using the infeed mechanism. -dtype: The type of elements in the tensor. -shape: The shape of the tensor. -layout: A vector holding the requested layout in minor-to-major sequence. -If a layout attribute is passed, but its values are all -1, the layout will -be computed by the infeed operation. -device_ordinal: The TPU device to use. This should be -1 when the Op -is running on a TPU device, and >= 0 when the Op is running on the CPU -device. -)doc"); + .SetIsStateful(); REGISTER_OP("InfeedEnqueueTuple") .Input("inputs: dtypes") @@ -65,21 +45,7 @@ REGISTER_OP("InfeedEnqueueTuple") .Attr("layouts: list(int) = []") .Attr("device_ordinal: int = -1") .SetShapeFn(shape_inference::NoOutputs) - .SetIsStateful() - .Doc(R"doc( -An op which feeds multiple Tensor values into the computation as an XLA tuple. - -inputs: A list of tensors that will be provided using the infeed mechanism. -dtypes: The element types of each element in `inputs`. -shapes: The shapes of each tensor in `inputs`. -layouts: A vector holding the requested layout in minor-to-major sequence for -all the tuple shapes, in the order the shapes appear in the "shapes" input. -The layout elements for a sub-shape can be set to -1, in which case the -corresponding layout will be computed by the infeed operation. -device_ordinal: The TPU device to use. This should be -1 when the Op -is running on a TPU device, and >= 0 when the Op is running on the CPU -device. -)doc"); + .SetIsStateful(); REGISTER_OP("InfeedDequeueTuple") .Output("outputs: dtypes") @@ -95,14 +61,6 @@ REGISTER_OP("InfeedDequeueTuple") c->set_output(i, out); } return Status::OK(); - }) - .Doc(R"doc( -A placeholder op for multiple values that will be fed into the computation -simultaneously as an XLA tuple. - -outputs: A list of tensors that will be provided using the infeed mechanism. -dtypes: The element types of each element in `outputs`. -shapes: The shapes of each tensor in `outputs`. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc b/tensorflow/core/ops/tpu_ordinal_selector_ops.cc similarity index 80% rename from tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc rename to tensorflow/core/ops/tpu_ordinal_selector_ops.cc index 54e6b20f7f..72f24c57db 100644 --- a/tensorflow/contrib/tpu/ops/tpu_ordinal_selector_op.cc +++ b/tensorflow/core/ops/tpu_ordinal_selector_ops.cc @@ -25,15 +25,6 @@ REGISTER_OP("TPUOrdinalSelector") c->set_output(0, c->Vector(shape_inference::InferenceContext::kUnknownDim)); return Status::OK(); - }) - .Doc(R"doc( -A TPU core selector Op. - -This Op produces a set of TPU cores (for warm-up) or a single TPU core -(for regular inference) to execute the TPU program on. The output is -consumed by TPUPartitionedCall. - -device_ordinals: A vector 1 or more TPU cores. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/outfeed_ops.cc b/tensorflow/core/ops/tpu_outfeed_ops.cc similarity index 59% rename from tensorflow/contrib/tpu/ops/outfeed_ops.cc rename to tensorflow/core/ops/tpu_outfeed_ops.cc index b05c76ca64..e170ed05a0 100644 --- a/tensorflow/contrib/tpu/ops/outfeed_ops.cc +++ b/tensorflow/core/ops/tpu_outfeed_ops.cc @@ -26,24 +26,13 @@ REGISTER_OP("OutfeedEnqueue") .Input("input: dtype") .Attr("dtype: type") .SetIsStateful() - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -An op which emits a single Tensor value from an XLA computation. - -input: A tensor that will be inserted into the outfeed queue. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("OutfeedEnqueueTuple") .Input("inputs: dtypes") .Attr("dtypes: list(type)") .SetIsStateful() - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -An op which emits multiple Tensor values from an XLA computation. - -inputs: A list of tensors that will be inserted into the outfeed queue as an -XLA tuple. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("OutfeedDequeue") .Output("output: dtype") @@ -51,18 +40,7 @@ REGISTER_OP("OutfeedDequeue") .Attr("shape: shape") .Attr("device_ordinal: int = -1") .SetIsStateful() - .SetShapeFn(shape_inference::ExplicitShape) - .Doc(R"doc( -Retrieves a single tensor from the computation outfeed. This operation will -block indefinitely until data is available. - -output: A tensor that will be read from the device outfeed. -dtype: The type of elements in the tensor. -shape: The shape of the tensor. -device_ordinal: The TPU device to use. This should be -1 when the Op -is running on a TPU device, and >= 0 when the Op is running on the CPU -device. -)doc"); + .SetShapeFn(shape_inference::ExplicitShape); REGISTER_OP("OutfeedDequeueTuple") .Output("outputs: dtypes") @@ -85,18 +63,6 @@ REGISTER_OP("OutfeedDequeueTuple") c->set_output(i, out); } return Status::OK(); - }) - .Doc(R"doc( -Retrieve multiple values that will be emitted by the computation as an XLA -tuple. This operations will block indefinitely until data is available. -Output `i` corresponds to XLA tuple element `i`. - -outputs: A list of tensors that will be read from the outfeed. -dtypes: The element types of each element in `outputs`. -shapes: The shapes of each tensor in `outputs`. -device_ordinal: The TPU device to use. This should be -1 when the Op -is running on a TPU device, and >= 0 when the Op is running on the CPU -device. -)doc"); + }); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/ops/replication_ops.cc b/tensorflow/core/ops/tpu_replication_ops.cc similarity index 69% rename from tensorflow/contrib/tpu/ops/replication_ops.cc rename to tensorflow/core/ops/tpu_replication_ops.cc index d4180d1a20..254399c9a2 100644 --- a/tensorflow/contrib/tpu/ops/replication_ops.cc +++ b/tensorflow/core/ops/tpu_replication_ops.cc @@ -69,10 +69,7 @@ REGISTER_OP("TPUReplicatedInput") } } return Status::OK(); - }) - .Doc( - "Operator that connects N unreplicated inputs to an N-way " - "replicated TPU computation."); + }); REGISTER_OP("TPUReplicatedOutput") .Input("input: T") @@ -84,10 +81,7 @@ REGISTER_OP("TPUReplicatedOutput") c->set_output(i, c->input(0)); } return Status::OK(); - }) - .Doc( - "Operator that connects the output of an N-way replicated TPU " - "computation to N separate outputs."); + }); REGISTER_OP("TPUCompilationResult") .Output("output: string") @@ -112,35 +106,6 @@ REGISTER_OP("TPUReplicate") .Input("variables: NumVariables * resource") .Input("guaranteed_constants: Tguaranteed_constants") .Output("outputs: output_types") - .SetShapeFn(shape_inference::UnknownShape) - .Doc(R"doc( -Runs replicated computations on a distributed TPU system. - -computation: a function containing the computation to run. -num_replicas: the number of replicas of the computation to run. -num_cores_per_replica: the number of logical cores in each replica. -topology: A serialized tensorflow.tpu.TopologyProto that describes the TPU -topology. -use_tpu: a bool indicating if this computation will run on TPU or CPU/GPU. -Currently, only supports a default placement (computation is placed on GPU -if one is available, and on CPU if not). -device_assignment: a flattened array with shape - [replica, num_cores_per_replica, mesh_dimension] that maps the coordinates - of logical cores in each replica of a computation to physical coordinates in - the TPU topology. -Tinputs: the types of the arguments to 'computation'. -inputs: the inputs to 'computation', flattened, in replica-major order. -Tbroadcast_inputs: the types of the additional arguments to broadcast to all - replicas. -Tguaranteed_constants: the types of the arguments to 'guaranteed_constants'. -broadcast_inputs: additional arguments to broadcast to all replicas. The - broadcast inputs are appended to the per-replica inputs when calling - computation. -guaranteed_constants: arguments which have been guaranteed to not -change their values during the session lifetime. These contain tensors marked as -constant using the GuaranteeConstOp. -output_types: the types of the outputs of 'computation'. -outputs: the outputs of 'computation'. -)doc"); + .SetShapeFn(shape_inference::UnknownShape); } // namespace tensorflow diff --git a/tensorflow/contrib/tpu/utils/BUILD b/tensorflow/core/tpu/BUILD similarity index 100% rename from tensorflow/contrib/tpu/utils/BUILD rename to tensorflow/core/tpu/BUILD diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc similarity index 99% rename from tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc rename to tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc index d1df7e78ab..2c179b5f83 100644 --- a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.cc +++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h" +#include "tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h similarity index 93% rename from tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h rename to tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h index 7a7833bf2d..ceb07ff355 100644 --- a/tensorflow/contrib/tpu/utils/tpu_embedding_optimization_parameters_utils.h +++ b/tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ -#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ +#ifndef TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ +#define TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ #include #include "absl/base/casts.h" @@ -87,4 +87,4 @@ const float kGradientAccumulatorInitialValue = absl::bit_cast(1); } // namespace tpu } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ +#endif // TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OPTIMIZATION_PARAMETERS_UTILS_H_ diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc similarity index 97% rename from tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc rename to tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc index e65abe3894..3a027757af 100644 --- a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.cc +++ b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h" +#include "tensorflow/core/tpu/tpu_embedding_output_layout_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/protobuf/tpu/tpu_embedding_output_layout.pb.h" diff --git a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.h similarity index 85% rename from tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h rename to tensorflow/core/tpu/tpu_embedding_output_layout_utils.h index 1a04c7bdb4..5bff401b9d 100644 --- a/tensorflow/contrib/tpu/utils/tpu_embedding_output_layout_utils.h +++ b/tensorflow/core/tpu/tpu_embedding_output_layout_utils.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ -#define TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ +#ifndef TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ +#define TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -35,4 +35,4 @@ Status ComputeOutputTensorShapes( } // namespace tpu } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TPU_UTILS_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ +#endif // TENSORFLOW_CORE_TPU_TPU_EMBEDDING_OUTPUT_LAYOUT_UTILS_H_ diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 44e9540fbe..bd31614a92 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1988,6 +1988,26 @@ tf_gen_op_wrapper_private_py( ], ) +tf_gen_op_wrapper_private_py( + name = "tpu_ops_gen", + visibility = [ + "//smartass/brain/configure/python:__pkg__", + "//tensorflow/contrib/tpu:__pkg__", + ], + deps = [ + "//tensorflow/core:tpu_configuration_ops_op_lib", + "//tensorflow/core:tpu_cross_replica_ops_op_lib", + "//tensorflow/core:tpu_embedding_ops_op_lib", + "//tensorflow/core:tpu_functional_ops_op_lib", + "//tensorflow/core:tpu_heartbeat_ops_op_lib", + "//tensorflow/core:tpu_host_compute_ops_op_lib", + "//tensorflow/core:tpu_infeed_ops_op_lib", + "//tensorflow/core:tpu_ordinal_selector_ops_op_lib", + "//tensorflow/core:tpu_outfeed_ops_op_lib", + "//tensorflow/core:tpu_replication_ops_op_lib", + ], +) + py_library( name = "array_grad", srcs = ["ops/array_grad.py"], -- GitLab From 880a87883d754c3544e0f6470fcd2b6feace21b2 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Wed, 13 Feb 2019 17:11:08 -0800 Subject: [PATCH 096/351] Remove the experimental naming prefix for implementation selector in grappler. PiperOrigin-RevId: 233861247 --- tensorflow/core/framework/function.cc | 10 ++-- tensorflow/core/framework/function_test.cc | 2 +- tensorflow/core/grappler/optimizers/BUILD | 14 ++--- .../grappler/optimizers/function_api_info.cc | 4 +- .../optimizers/function_api_info_test.cc | 4 +- ...selector.cc => implementation_selector.cc} | 10 ++-- ...n_selector.h => implementation_selector.h} | 27 +++++----- ...est.cc => implementation_selector_test.cc} | 53 +++++++++---------- .../grappler/optimizers/meta_optimizer.cc | 8 +-- tensorflow/python/eager/function.py | 14 ----- tensorflow/python/eager/function_test.py | 23 +++----- tensorflow/python/keras/layers/recurrent.py | 16 +++--- .../python/keras/layers/unified_gru_test.py | 2 +- .../python/keras/layers/unified_lstm_test.py | 2 +- 14 files changed, 81 insertions(+), 108 deletions(-) rename tensorflow/core/grappler/optimizers/{experimental_implementation_selector.cc => implementation_selector.cc} (95%) rename tensorflow/core/grappler/optimizers/{experimental_implementation_selector.h => implementation_selector.h} (80%) rename tensorflow/core/grappler/optimizers/{experimental_implementation_selector_test.cc => implementation_selector_test.cc} (82%) diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index b7adfd0c94..20a5015970 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -1339,7 +1339,7 @@ GET_ATTR(bool) namespace { -constexpr char kExperimentalApiImplements[] = "experimental_api_implements"; +constexpr char kApiImplements[] = "api_implements"; absl::flat_hash_set ReachableFunctions( const FunctionLibraryDefinition& flib, @@ -1347,10 +1347,10 @@ absl::flat_hash_set ReachableFunctions( // Functions that are reachable from the graph. absl::flat_hash_set reachable_funcs; - // For any functions, if it has attribute "experimental_api_implements" = + // For any functions, if it has attribute "api_implements" = // "some_interface" and it is reachable, then it means any other // function with same attribute name and value could also be potentially - // reachable, eg via experimental_implementation_selector swapping the + // reachable, eg via implementation_selector swapping the // nodedef. absl::flat_hash_set reachable_api_interface; @@ -1400,7 +1400,7 @@ absl::flat_hash_set ReachableFunctions( const string& func_name = func->signature().name(); reachable_funcs.insert(func_name); - const auto attr_it = func->attr().find(kExperimentalApiImplements); + const auto attr_it = func->attr().find(kApiImplements); if (attr_it != func->attr().end()) { reachable_api_interface.insert(attr_it->second.s()); } @@ -1416,7 +1416,7 @@ absl::flat_hash_set ReachableFunctions( for (const auto& func_name : flib.ListFunctionNames()) { const auto& func_def = flib.Find(func_name); - const auto attr_it = func_def->attr().find(kExperimentalApiImplements); + const auto attr_it = func_def->attr().find(kApiImplements); if (attr_it != func_def->attr().end()) { if (reachable_api_interface.contains(attr_it->second.s())) { reachable_funcs.insert(func_name); diff --git a/tensorflow/core/framework/function_test.cc b/tensorflow/core/framework/function_test.cc index 6a828e9afa..779e9f7f41 100644 --- a/tensorflow/core/framework/function_test.cc +++ b/tensorflow/core/framework/function_test.cc @@ -1320,7 +1320,7 @@ TEST(FunctionLibraryDefinitionTest, ReachableDefinitions) { if (!interface_name.empty()) { auto* attr = func_def.mutable_attr(); - (*attr)["experimental_api_implements"].set_s(interface_name); + (*attr)["api_implements"].set_s(interface_name); } return func_def; }; diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0b5a63ab92..9bb63a5f4e 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -521,9 +521,9 @@ cc_library( ":custom_graph_optimizer_registry", ":debug_stripper", ":dependency_optimizer", - ":experimental_implementation_selector", ":function_optimizer", ":graph_optimizer", + ":implementation_selector", ":layout_optimizer", ":loop_optimizer", ":memory_optimizer", @@ -869,9 +869,9 @@ tf_cc_test( ) cc_library( - name = "experimental_implementation_selector", - srcs = ["experimental_implementation_selector.cc"], - hdrs = ["experimental_implementation_selector.h"], + name = "implementation_selector", + srcs = ["implementation_selector.cc"], + hdrs = ["implementation_selector.h"], deps = [ ":custom_graph_optimizer", ":custom_graph_optimizer_registry", @@ -887,14 +887,14 @@ cc_library( ) tf_cc_test( - name = "experimental_implementation_selector_test", + name = "implementation_selector_test", size = "small", - srcs = ["experimental_implementation_selector_test.cc"], + srcs = ["implementation_selector_test.cc"], deps = [ ":custom_graph_optimizer", ":custom_graph_optimizer_registry", - ":experimental_implementation_selector", ":function_api_info", + ":implementation_selector", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", diff --git a/tensorflow/core/grappler/optimizers/function_api_info.cc b/tensorflow/core/grappler/optimizers/function_api_info.cc index 497ad6032e..9f6352f1f2 100644 --- a/tensorflow/core/grappler/optimizers/function_api_info.cc +++ b/tensorflow/core/grappler/optimizers/function_api_info.cc @@ -29,10 +29,10 @@ FunctionApiInfo::~FunctionApiInfo() {} Status FunctionApiInfo::Init(const FunctionDef& function_def) { function_type_ = FunctionApiInfo::FunctionType::INFERENCE; for (const auto& attr : function_def.attr()) { - if (attr.first == "experimental_api_preferred_device") { + if (attr.first == "api_preferred_device") { preferred_device_ = attr.second.s(); } - if (attr.first == "experimental_api_implements") { + if (attr.first == "api_implements") { interface_name_ = attr.second.s(); } if (attr.first == "forward_function_name") { diff --git a/tensorflow/core/grappler/optimizers/function_api_info_test.cc b/tensorflow/core/grappler/optimizers/function_api_info_test.cc index b683d26b32..9bb517faa3 100644 --- a/tensorflow/core/grappler/optimizers/function_api_info_test.cc +++ b/tensorflow/core/grappler/optimizers/function_api_info_test.cc @@ -58,9 +58,9 @@ void PopulateFunction(const string& name, const string& api_interface_name, auto* func_attr = func_def->mutable_attr(); if (!api_interface_name.empty()) - (*func_attr)["experimental_api_implements"].set_s(api_interface_name); + (*func_attr)["api_implements"].set_s(api_interface_name); if (!preferred_device.empty()) - (*func_attr)["experimental_api_preferred_device"].set_s(preferred_device); + (*func_attr)["api_preferred_device"].set_s(preferred_device); if (!forward_function_name.empty()) (*func_attr)["forward_function_name"].set_s(forward_function_name); if (!backward_function_name.empty()) diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc b/tensorflow/core/grappler/optimizers/implementation_selector.cc similarity index 95% rename from tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc rename to tensorflow/core/grappler/optimizers/implementation_selector.cc index 75ad8bffef..f318569bd1 100644 --- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.cc +++ b/tensorflow/core/grappler/optimizers/implementation_selector.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h" +#include "tensorflow/core/grappler/optimizers/implementation_selector.h" #include @@ -101,14 +101,14 @@ Status UpdateNodeDef(NodeDef* node_def, const string& funcName, return Status::OK(); } -Status ExperimentalImplementationSelector::LoadFunctions( +Status ImplementationSelector::LoadFunctions( const GraphDef& graph) { lib_info_.reset(new FunctionLibraryApiInfo); TF_RETURN_IF_ERROR(lib_info_->Init(graph.library())); return Status::OK(); } -Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall( +Status ImplementationSelector::MaybeOptimizeFunctionCall( NodeDef* node_def) const { // There are two ways of calling functions: // 1. By specifying an op name as a function name, or @@ -170,7 +170,7 @@ Status ExperimentalImplementationSelector::MaybeOptimizeFunctionCall( return Status::OK(); } -Status ExperimentalImplementationSelector::SelectImplementation( +Status ImplementationSelector::SelectImplementation( GraphDef* graph) const { if (!graph->has_library()) { VLOG(2) << "Skipping graph since it does not have function def"; @@ -183,7 +183,7 @@ Status ExperimentalImplementationSelector::SelectImplementation( return Status::OK(); } -Status ExperimentalImplementationSelector::Optimize(Cluster* cluster, +Status ImplementationSelector::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { *optimized_graph = item.graph; diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.h b/tensorflow/core/grappler/optimizers/implementation_selector.h similarity index 80% rename from tensorflow/core/grappler/optimizers/experimental_implementation_selector.h rename to tensorflow/core/grappler/optimizers/implementation_selector.h index 82f7473a14..c206d21640 100644 --- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector.h +++ b/tensorflow/core/grappler/optimizers/implementation_selector.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_ -#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_ +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_ #include @@ -33,7 +33,6 @@ limitations under the License. namespace tensorflow { namespace grappler { -// -- EXPERIMENTAL -- // This transformation replaces function calls by the appropriate function // definition based on properties of the runtime system. For instance, // we may choose one implementation over another if we have a GPU with @@ -45,12 +44,12 @@ namespace grappler { // // For instance, the python code might specify: // @Defun(tf.float32, -// experimental_api_implements='plus_one', -// experimental_api_preferred_device='GPU') +// api_implements='plus_one', +// api_preferred_device='GPU') // def plus_one_gpu(x): return x + 1.0 // // @Defun(tf.float32, -// experimental_api_implements='plus_one') +// api_implements='plus_one') // def plus_one_reference_implementation(x): return x + 1.0 // input = tf.constant(2.0, dtype=tf.float32) // @@ -62,21 +61,21 @@ namespace grappler { // `plus_one_reference_implementation` based on the availability of the GPU. // // Available annotations: -// - experimental_api_implements(string): all functions mapping to the same +// - api_implements(string): all functions mapping to the same // string can be interchanged. For now, all functions must have the same // signature and overloads are not allowed. Defuns within defuns are // allowed. -// - experimental_api_preferred_device(string): sets which device is preferred. -class ExperimentalImplementationSelector : public CustomGraphOptimizer { +// - api_preferred_device(string): sets which device is preferred. +class ImplementationSelector : public CustomGraphOptimizer { public: - ExperimentalImplementationSelector() = default; - ~ExperimentalImplementationSelector() override = default; + ImplementationSelector() = default; + ~ImplementationSelector() override = default; Status Init( const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { return Status::OK(); } string name() const override { - return "experimental_implementation_selector"; + return "implementation_selector"; } // This call is not thread-safe. @@ -106,10 +105,10 @@ class ExperimentalImplementationSelector : public CustomGraphOptimizer { std::unique_ptr lib_info_; - TF_DISALLOW_COPY_AND_ASSIGN(ExperimentalImplementationSelector); + TF_DISALLOW_COPY_AND_ASSIGN(ImplementationSelector); }; } // namespace grappler } // namespace tensorflow -#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_EXPERIMENTAL_IMPLEMENTATION_SELECTOR_H_ +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_IMPLEMENTATION_SELECTOR_H_ diff --git a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc b/tensorflow/core/grappler/optimizers/implementation_selector_test.cc similarity index 82% rename from tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc rename to tensorflow/core/grappler/optimizers/implementation_selector_test.cc index e330835e9b..e2f58964a2 100644 --- a/tensorflow/core/grappler/optimizers/experimental_implementation_selector_test.cc +++ b/tensorflow/core/grappler/optimizers/implementation_selector_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h" +#include "tensorflow/core/grappler/optimizers/implementation_selector.h" #include #include @@ -38,15 +38,14 @@ namespace { constexpr char CpuDevice[] = "/device:CPU:0"; constexpr char GpuDevice[] = "/device:GPU:0"; -class ExperimentalImplementationSelectorTest : public GrapplerTest {}; +class ImplementationSelectorTest : public GrapplerTest {}; -TEST_F(ExperimentalImplementationSelectorTest, NoUpdate) { +TEST_F(ImplementationSelectorTest, NoUpdate) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {CpuDevice}); GrapplerItem item; CHECK(fake_input.NextItem(&item)); - std::unique_ptr optimizer( - new ExperimentalImplementationSelector); + std::unique_ptr optimizer(new ImplementationSelector); ASSERT_NE(nullptr, optimizer); TF_ASSERT_OK(optimizer->Init()); @@ -58,19 +57,19 @@ TEST_F(ExperimentalImplementationSelectorTest, NoUpdate) { EXPECT_EQ(item.graph.node_size(), output.node_size()); } -TEST_F(ExperimentalImplementationSelectorTest, SwapImplementation) { +TEST_F(ImplementationSelectorTest, SwapImplementation) { using test::function::NDef; auto cpu_def = test::function::XTimesTwo(); auto* func_attr = cpu_def.mutable_attr(); - (*func_attr)["experimental_api_implements"].set_s("times_two"); - (*func_attr)["experimental_api_preferred_device"].set_s("CPU"); + (*func_attr)["api_implements"].set_s("times_two"); + (*func_attr)["api_preferred_device"].set_s("CPU"); auto gpu_def = test::function::XAddX(); auto* func2_attr = gpu_def.mutable_attr(); - (*func2_attr)["experimental_api_implements"].set_s("times_two"); - (*func2_attr)["experimental_api_preferred_device"].set_s("GPU"); + (*func2_attr)["api_implements"].set_s("times_two"); + (*func2_attr)["api_preferred_device"].set_s("GPU"); - ExperimentalImplementationSelector optimizer; + ImplementationSelector optimizer; GraphDef output; GrapplerItem item; item.graph = test::function::GDef( @@ -96,19 +95,19 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementation) { } } -TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationEval) { +TEST_F(ImplementationSelectorTest, SwapImplementationEval) { using test::function::NDef; auto cpu_def = test::function::XTimesTwo(); auto* func_attr = cpu_def.mutable_attr(); - (*func_attr)["experimental_api_implements"].set_s("random_boost"); - (*func_attr)["experimental_api_preferred_device"].set_s("CPU"); + (*func_attr)["api_implements"].set_s("random_boost"); + (*func_attr)["api_preferred_device"].set_s("CPU"); auto gpu_def = test::function::XTimesFour(); auto* func2_attr = gpu_def.mutable_attr(); - (*func2_attr)["experimental_api_implements"].set_s("random_boost"); - (*func2_attr)["experimental_api_preferred_device"].set_s("GPU"); + (*func2_attr)["api_implements"].set_s("random_boost"); + (*func2_attr)["api_preferred_device"].set_s("GPU"); - ExperimentalImplementationSelector optimizer; + ImplementationSelector optimizer; GraphDef output; GrapplerItem item; item.graph = test::function::GDef( @@ -133,7 +132,7 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationEval) { test::AsScalar(2.0f)); } -TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { +TEST_F(ImplementationSelectorTest, SwapImplementationWithGradient) { using test::function::NDef; using FDH = FunctionDefHelper; // boost_1 returns the doubled input and a const as the internal state, the @@ -146,8 +145,8 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { /* Mapping between function returns and function node outputs. */ {{"z", "boost:z:0"}, {"s", "one:output:0"}}); auto* boost_1_attr = boost_1.mutable_attr(); - (*boost_1_attr)["experimental_api_implements"].set_s("random_boost"); - (*boost_1_attr)["experimental_api_preferred_device"].set_s("CPU"); + (*boost_1_attr)["api_implements"].set_s("random_boost"); + (*boost_1_attr)["api_preferred_device"].set_s("CPU"); (*boost_1_attr)["backward_function_name"].set_s("BoostCpuGradient"); FunctionDef boost_1_gradient = FDH::Create( @@ -157,8 +156,8 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { /* Mapping between function returns and function node outputs. */ {{"dx", "grad:z:0"}}); auto* boost_1_grad_attr = boost_1_gradient.mutable_attr(); - (*boost_1_grad_attr)["experimental_api_implements"].set_s("random_boost"); - (*boost_1_grad_attr)["experimental_api_preferred_device"].set_s("CPU"); + (*boost_1_grad_attr)["api_implements"].set_s("random_boost"); + (*boost_1_grad_attr)["api_preferred_device"].set_s("CPU"); (*boost_1_grad_attr)["forward_function_name"].set_s("BoostCpu"); // boost_2 return the input * 4, and with two extra internal states. @@ -171,8 +170,8 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { /* Mapping between function returns and function node outputs. */ {{"z", "boost:z:0"}, {"s1", "one:output:0"}, {"s2", "two:output:0"}}); auto* boost_2_attr = boost_2_func.mutable_attr(); - (*boost_2_attr)["experimental_api_implements"].set_s("random_boost"); - (*boost_2_attr)["experimental_api_preferred_device"].set_s("GPU"); + (*boost_2_attr)["api_implements"].set_s("random_boost"); + (*boost_2_attr)["api_preferred_device"].set_s("GPU"); (*boost_2_attr)["backward_function_name"].set_s("BoostGpuGradient"); FunctionDef boost_2_gradient = FDH::Create( @@ -182,8 +181,8 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { /* Mapping between function returns and function node outputs. */ {{"dx", "grad:z:0"}}); auto* boost_2_grad_attr = boost_2_gradient.mutable_attr(); - (*boost_2_grad_attr)["experimental_api_implements"].set_s("random_boost"); - (*boost_2_grad_attr)["experimental_api_preferred_device"].set_s("GPU"); + (*boost_2_grad_attr)["api_implements"].set_s("random_boost"); + (*boost_2_grad_attr)["api_preferred_device"].set_s("GPU"); (*boost_2_grad_attr)["forward_function_name"].set_s("BoostGpu"); // Define the forward function with f = boost2 function but with CPU device. @@ -203,7 +202,7 @@ TEST_F(ExperimentalImplementationSelectorTest, SwapImplementationWithGradient) { {"f", FDH::FunctionRef("Boost2Gradient")}}, CpuDevice); - ExperimentalImplementationSelector optimizer; + ImplementationSelector optimizer; GraphDef output; GrapplerItem item; item.graph = test::function::GDef( diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 33942a591f..8b21e58073 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -26,8 +26,8 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/debug_stripper.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" -#include "tensorflow/core/grappler/optimizers/experimental_implementation_selector.h" #include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include "tensorflow/core/grappler/optimizers/implementation_selector.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" #include "tensorflow/core/grappler/optimizers/memory_optimizer.h" @@ -241,14 +241,14 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers( pre_initialized_optimizers.end()) { continue; } - // Initialize the ExperimentalImplementationSelector here instead of + // Initialize the ImplementationSelector here instead of // CustomizeOptimizer registry, due the static link issue in TensorRT for // double registry. // TODO(laigd): Remove this hack and change it back to use the registry once // the duplicate static import issue is fixed. std::unique_ptr custom_optimizer; - if (optimizer_config.name() == "ExperimentalImplementationSelector") { - custom_optimizer.reset(new ExperimentalImplementationSelector()); + if (optimizer_config.name() == "ImplementationSelector") { + custom_optimizer.reset(new ImplementationSelector()); } else { custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( optimizer_config.name()); diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index f10363b14e..1b7e751801 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -21,7 +21,6 @@ from __future__ import print_function import collections import functools -import re import threading import types as types_lib import weakref @@ -61,13 +60,6 @@ from tensorflow.python.util import tf_inspect FORWARD_FUNCTION_ATTRIBUTE_NAME = "forward_function_name" BACKWARD_FUNCTION_ATTRIBUTE_NAME = "backward_function_name" -# TODO(scottzhu): Update this to allow arbitrary attribute names in future. -WHITELIST_FUNCTION_ATTRIBUTE_REGEX = [ - "experimental_.*", - FORWARD_FUNCTION_ATTRIBUTE_NAME, - BACKWARD_FUNCTION_ATTRIBUTE_NAME -] - CacheKey = collections.namedtuple("CacheKey", [ "input_signature", "parent_graph", "device_functions", "colocation_stack", "uses_xla" @@ -108,12 +100,6 @@ def _parse_func_attrs(attributes): """ attrs = {} for key, value in attributes.items(): - if not any(re.match(reg, key) - for reg in WHITELIST_FUNCTION_ATTRIBUTE_REGEX): - raise ValueError("Attribute name is not whitelisted. " - "Whitelisted: prefix %s, got: %s" % - (WHITELIST_FUNCTION_ATTRIBUTE_REGEX, key)) - if isinstance(value, attr_value_pb2.AttrValue): attrs[key] = value # bool type check has to happen before int since bool is a subclass of int. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 8ffcd2156c..7fecf80760 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1669,17 +1669,6 @@ class FunctionTest(test.TestCase, parameterized.TestCase): # pylint: enable=protected-access def testFunctionWithInvalidAttribute(self): - @function.defun_with_attributes(attributes={'attr1': 'value1'}) - def matmul(x, y): - return math_ops.matmul(x, y) - - with self.assertRaisesRegexp(ValueError, - '.*Attribute name is not whitelisted.*'): - with context.graph_mode(), self.cached_session(): - with ops.get_default_graph().as_default(): - t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]]) - matmul(t, t) - @function.defun_with_attributes(attributes={'experimental_1': ['value1']}) def add(x, y): return math_ops.add(x, y) @@ -1894,11 +1883,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase): rewrites = rewriter_config_pb2.RewriterConfig() # function_optimizer has to be turn off, otherwise it will delete the # registered function if it does not get called. - # TODO(scottzhu): Move the ExperimentalImplementationSelector to be called + # TODO(scottzhu): Move the ImplementationSelector to be called # before function_optimizer in future. rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF customer_optimizer = rewrites.custom_optimizers.add() - customer_optimizer.name = 'ExperimentalImplementationSelector' + customer_optimizer.name = 'ImplementationSelector' rewrites.min_graph_nodes = -1 graph_options = config_pb2.GraphOptions( rewrite_options=rewrites, build_cost_model=1) @@ -1909,16 +1898,16 @@ class FunctionTest(test.TestCase, parameterized.TestCase): @function.defun_with_attributes( attributes={ - 'experimental_api_implements': 'random_boost', - 'experimental_api_preferred_device': 'CPU' + 'api_implements': 'random_boost', + 'api_preferred_device': 'CPU' }) def cpu_boost(x): return math_ops.add(x, 2.0) @function.defun_with_attributes( attributes={ - 'experimental_api_implements': 'random_boost', - 'experimental_api_preferred_device': 'GPU' + 'api_implements': 'random_boost', + 'api_preferred_device': 'GPU' }) def gpu_boost(x): return math_ops.add(x, 4.0) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index ff5a37b627..cbda4b62fc 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -49,8 +49,8 @@ from tensorflow.python.util.tf_export import keras_export # The following string constants are used by Defun approach for unified backend # of LSTM and GRU. -_DEFUN_API_NAME_ATTRIBUTE = 'experimental_api_implements' -_DEFUN_DEVICE_ATTRIBUTE = 'experimental_api_preferred_device' +_DEFUN_API_NAME_ATTRIBUTE = 'api_implements' +_DEFUN_DEVICE_ATTRIBUTE = 'api_preferred_device' _CPU_DEVICE_NAME = 'CPU' _GPU_DEVICE_NAME = 'GPU' @@ -2173,11 +2173,11 @@ class UnifiedGRU(GRU): recurrent_activation=self.recurrent_activation, time_major=self.time_major) else: - experimental_api_name = 'gru_' + str(uuid.uuid4()) + api_name = 'gru_' + str(uuid.uuid4()) defun_standard_gru = _generate_defun_backend( - experimental_api_name, _CPU_DEVICE_NAME, standard_gru) + api_name, _CPU_DEVICE_NAME, standard_gru) defun_cudnn_gru = _generate_defun_backend( - experimental_api_name, _GPU_DEVICE_NAME, cudnn_gru) + api_name, _GPU_DEVICE_NAME, cudnn_gru) # Call the normal GRU impl and register the CuDNN impl function. The # grappler will kick in during session execution to optimize the graph. last_output, outputs, new_h, runtime = defun_standard_gru( @@ -3125,11 +3125,11 @@ class UnifiedLSTM(LSTM): # identifiable API name, so that Grappler won't get confused when it # sees multiple LSTM layers added into same graph, and it will be able # to pair up the different implementations across them. - experimental_api_name = 'lstm_' + str(uuid.uuid4()) + api_name = 'lstm_' + str(uuid.uuid4()) defun_standard_lstm = _generate_defun_backend( - experimental_api_name, _CPU_DEVICE_NAME, standard_lstm) + api_name, _CPU_DEVICE_NAME, standard_lstm) defun_cudnn_lstm = _generate_defun_backend( - experimental_api_name, _GPU_DEVICE_NAME, cudnn_lstm) + api_name, _GPU_DEVICE_NAME, cudnn_lstm) # Call the normal LSTM impl and register the CuDNN impl function. The # grappler will kick in during session execution to optimize the graph. diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py index b25007e52c..b7e644920a 100644 --- a/tensorflow/python/keras/layers/unified_gru_test.py +++ b/tensorflow/python/keras/layers/unified_gru_test.py @@ -49,7 +49,7 @@ from tensorflow.python.training import gradient_descent _rewrites = rewriter_config_pb2.RewriterConfig() _rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF _customer_optimizer = _rewrites.custom_optimizers.add() -_customer_optimizer.name = 'ExperimentalImplementationSelector' +_customer_optimizer.name = 'ImplementationSelector' _rewrites.min_graph_nodes = -1 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py index 08153dbd94..adc66ddd84 100644 --- a/tensorflow/python/keras/layers/unified_lstm_test.py +++ b/tensorflow/python/keras/layers/unified_lstm_test.py @@ -49,7 +49,7 @@ from tensorflow.python.training import gradient_descent _rewrites = rewriter_config_pb2.RewriterConfig() _rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF _customer_optimizer = _rewrites.custom_optimizers.add() -_customer_optimizer.name = 'ExperimentalImplementationSelector' +_customer_optimizer.name = 'ImplementationSelector' _rewrites.min_graph_nodes = -1 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) -- GitLab From d19902229b131714487def2ec9b1b2c65e0f2899 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 17:17:35 -0800 Subject: [PATCH 097/351] Exposes tf.contrib.summary.initialize in the tf.compat.v1 api, and adds a conversion rule to the upgrade script PiperOrigin-RevId: 233862144 --- tensorflow/python/ops/summary_ops_v2.py | 5 ++++- tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt | 4 ++++ tensorflow/tools/compatibility/renames_v2.py | 1 + tensorflow/tools/compatibility/tf_upgrade_v2.py | 2 ++ tensorflow/tools/compatibility/tf_upgrade_v2_test.py | 6 ++++++ 5 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/summary_ops_v2.py b/tensorflow/python/ops/summary_ops_v2.py index 835e79bb08..eb2ee7cbd4 100644 --- a/tensorflow/python/ops/summary_ops_v2.py +++ b/tensorflow/python/ops/summary_ops_v2.py @@ -184,16 +184,19 @@ class SummaryWriter(object): return self._close() +@tf_export(v1=["summary.initialize"]) def initialize( graph=None, # pylint: disable=redefined-outer-name session=None): """Initializes summary writing for graph execution mode. + This operation is a no-op when executing eagerly. + This helper method provides a higher-level alternative to using `tf.contrib.summary.summary_writer_initializer_op` and `tf.contrib.summary.graph`. - Most users will also want to call `tf.train.create_global_step` + Most users will also want to call `tf.compat.v1.train.create_global_step` which can happen before or after this function is called. Args: diff --git a/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt index 7ed9cd77a0..3879645d60 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.summary.pbtxt @@ -44,6 +44,10 @@ tf_module { name: "image" argspec: "args=[\'name\', \'tensor\', \'max_outputs\', \'collections\', \'family\'], varargs=None, keywords=None, defaults=[\'3\', \'None\', \'None\'], " } + member_method { + name: "initialize" + argspec: "args=[\'graph\', \'session\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "merge" argspec: "args=[\'inputs\', \'collections\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py index a1c47a1efa..32501811a9 100644 --- a/tensorflow/tools/compatibility/renames_v2.py +++ b/tensorflow/tools/compatibility/renames_v2.py @@ -622,6 +622,7 @@ renames = { 'tf.summary.get_summary_description': 'tf.compat.v1.summary.get_summary_description', 'tf.summary.histogram': 'tf.compat.v1.summary.histogram', 'tf.summary.image': 'tf.compat.v1.summary.image', + 'tf.summary.initialize': 'tf.compat.v1.summary.initialize', 'tf.summary.merge': 'tf.compat.v1.summary.merge', 'tf.summary.merge_all': 'tf.compat.v1.summary.merge_all', 'tf.summary.scalar': 'tf.compat.v1.summary.scalar', diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index f137901947..3345434c86 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -565,6 +565,8 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.nn.rnn_cell.RNNCell", "tf.contrib.rnn.LSTMStateTuple": "tf.nn.rnn_cell.LSTMStateTuple", + "tf.contrib.summary.initialize": + "tf.compat.v1.summary.initialize", "tf.contrib.framework.sort": "tf.sort", "tf.contrib.framework.argsort": diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index 236f35911b..db05f71e9c 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -1201,6 +1201,12 @@ def _log_prob(self, x): _, _, _, new_text = self._upgrade(text) self.assertEqual(expected, new_text) + def test_contrib_initialize(self): + text = "tf.contrib.summary.initialize" + expected = "tf.compat.v1.summary.initialize" + _, _, _, new_text = self._upgrade(text) + self.assertEqual(expected, new_text) + def test_contrib_framework_argsort(self): text = "tf.contrib.framework.argsort" expected = "tf.argsort" -- GitLab From 08c9a5b865429b0daeeb5f1ffdcdbd2623e483e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 17:24:50 -0800 Subject: [PATCH 098/351] noop PiperOrigin-RevId: 233863131 --- tensorflow/compiler/tf2xla/BUILD | 10 +++++++++- tensorflow/compiler/xla/xla.bzl | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 585ee7d59d..7d9e7b9fc1 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -24,7 +24,7 @@ package( ) load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") -load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library") +load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library", "xla_py_proto_library") cc_library( name = "tf2xla_supported_ops_lib", @@ -60,6 +60,14 @@ xla_proto_library( ], ) +xla_py_proto_library( + name = "tf2xla_py", + has_services = False, + api_version = 2, + visibility = ["//visibility:public"], + deps = [":tf2xla_proto"], +) + xla_proto_library( name = "host_compute_metadata_proto", srcs = ["host_compute_metadata.proto"], diff --git a/tensorflow/compiler/xla/xla.bzl b/tensorflow/compiler/xla/xla.bzl index c743dfd32b..cda2d7c7c6 100644 --- a/tensorflow/compiler/xla/xla.bzl +++ b/tensorflow/compiler/xla/xla.bzl @@ -30,6 +30,11 @@ def xla_proto_library(name, srcs = [], deps = [], visibility = None, testonly = **kwargs ) +def xla_py_proto_library(**kwargs): + # Note: we don't currently define a proto library target for Python in OSS. + _ignore = kwargs + pass + def xla_py_grpc_library(**kwargs): # Note: we don't currently define any special targets for Python GRPC in OSS. _ignore = kwargs -- GitLab From 01cf864bb0d82370c259866c0735c0358e33377c Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Wed, 13 Feb 2019 17:35:57 -0800 Subject: [PATCH 099/351] Remove the old TRT INT8 converson logic. PiperOrigin-RevId: 233864708 --- tensorflow/compiler/tf2tensorrt/BUILD | 7 +- .../tf2tensorrt/convert/convert_graph.cc | 50 ----------- .../tf2tensorrt/convert/convert_graph.h | 6 -- .../tf2tensorrt/convert/convert_nodes.cc | 1 - .../tf2tensorrt/kernels/trt_engine_op.cc | 22 ----- .../compiler/tf2tensorrt/utils/test_utils.cc | 2 + .../compiler/tf2tensorrt/utils/test_utils.h | 3 +- .../tf2tensorrt/utils/trt_resource_manager.cc | 45 ---------- .../tf2tensorrt/utils/trt_resource_manager.h | 45 ---------- .../python/compiler/tensorrt/trt_conversion.i | 89 +------------------ .../python/compiler/tensorrt/trt_convert.py | 52 +---------- 11 files changed, 8 insertions(+), 314 deletions(-) delete mode 100644 tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc delete mode 100644 tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD index 7466aea4c9..63cad6a159 100644 --- a/tensorflow/compiler/tf2tensorrt/BUILD +++ b/tensorflow/compiler/tf2tensorrt/BUILD @@ -171,13 +171,11 @@ tf_cuda_library( name = "trt_resources", srcs = [ "utils/trt_int8_calibrator.cc", - "utils/trt_resource_manager.cc", "utils/trt_resources.cc", ], hdrs = [ "utils/trt_int8_calibrator.h", "utils/trt_lru_cache.h", - "utils/trt_resource_manager.h", "utils/trt_resources.h", ], deps = [ @@ -266,7 +264,6 @@ tf_cuda_library( "//tensorflow/core:framework_lite", "//tensorflow/core:gpu_runtime", "//tensorflow/core:graph", - "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:devices", @@ -433,7 +430,7 @@ cc_library( copts = tf_copts(), deps = [ "//tensorflow/core:framework", - "//tensorflow/core:lib", + "//tensorflow/core:lib_proto_parsing", ], ) @@ -442,7 +439,7 @@ cc_library( srcs = ["utils/test_utils.cc"], hdrs = ["utils/test_utils.h"], deps = [ - "//tensorflow/core:lib", + "//tensorflow/core:lib_proto_parsing", "@com_googlesource_code_re2//:re2", ], ) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index d6080c02d4..beb87b6c24 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/compiler/tf2tensorrt/plugin/trt_plugin_factory.h" #include "tensorflow/compiler/tf2tensorrt/segment/segment.h" #include "tensorflow/compiler/tf2tensorrt/utils/test_utils.h" -#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h" #include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" @@ -190,55 +189,6 @@ tensorflow::Status BuildNodeMap( } // namespace -// Function to get calibration from ResourceMgr and put them into nodedef. -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph, - bool is_dyn_op) { - LOG(INFO) << "Starting Calib Conversion"; - *infer_graph = graph_def; - auto trt_rm = TRTResourceManager::instance(); - auto calib_rm = trt_rm->getManager("TRTCalibration"); - int num_nodes = infer_graph->node_size(); - if (!is_dyn_op) { - LOG(WARNING) << "Construction of static int8 engine is not implemented " - "yet!. Dynamic engine will be constructed"; - } - for (int i = 0; i < num_nodes; ++i) { - auto n = infer_graph->mutable_node(i); - if (n->op() == "TRTEngineOp") { - VLOG(1) << "Processing " << n->name(); - const string& container_name = n->attr().at("segment_funcdef_name").s(); - TRTCalibrationResource* cres = nullptr; - auto status = calib_rm->Lookup(container_name, "Calibrator", &cres); - if (!status.ok()) { - LOG(ERROR) << "Could not get Calibration information. Did you run with " - "calibration data?"; - return tensorflow::errors::FailedPrecondition( - "Need to run graph with calibration data first!"); - } - tensorflow::core::ScopedUnref calib_sc(cres); - if (cres->calibrator_) { - cres->calibrator_->waitAndSetDone(); - cres->thr_->join(); - const auto& calibration_table = - cres->calibrator_->getCalibrationTableAsString(); - if (calibration_table.empty()) { - LOG(ERROR) << "Calibration table is empty"; - return tensorflow::errors::Unknown( - "Calibration table is missing. This shouldn't have happened!"); - } - n->mutable_attr()->at("calibration_data").set_s(calibration_table); - } else { - LOG(ERROR) << "Can't get TRTCalibrator from resource manager!"; - return tensorflow::errors::Unknown( - "Can't get TRTCalibrator from resource manager!"); - } - TF_RETURN_IF_ERROR(calib_rm->Cleanup(container_name)); - } - } - return tensorflow::Status::OK(); -} - tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h index 95cf0227dc..80f68d36a3 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.h @@ -85,12 +85,6 @@ struct ConversionParams { std::vector cached_engine_batches; // list of cached engines }; -// This method extracts calibration information from the resource managers -// and puts them in to engine nodedefs. -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def, - bool is_dyn_op); - // - max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. // - max_workspace_size_bytes: The upper bound of memory allowance for engine diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 0d5b9851f7..cce937a253 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/compiler/tf2tensorrt/convert/utils.h" #include "tensorflow/compiler/tf2tensorrt/plugin/trt_plugin_factory.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h" -#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h" #include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc index e3b31d736e..f6d387c59c 100644 --- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h" -#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h" #include "tensorflow/compiler/tf2tensorrt/utils/trt_resources.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" @@ -295,27 +294,6 @@ void TRTEngineOp::ExecuteCalibration(OpKernelContext* ctx, return this->AllocateCalibrationResources(ctx, cr); }})); tensorflow::core::ScopedUnref calib_sc(calib_res); - // TODO(aaroey): here we also add the resource to the ResourceMgr singleton. - // This is needed before we migrate all uses of calib_graph_to_infer_graph() - // to the new calibration workflow. After that we'll remove this block. - { - auto deprecated_rm = - TRTResourceManager::instance()->getManager("TRTCalibration"); - TRTCalibrationResource* copied_resource = nullptr; - // Check whether the resource exists, and create it if not. - if (deprecated_rm->Lookup(funcdef_name_, "Calibrator", &copied_resource) - .ok()) { - // Do nothing if the resource exists. - copied_resource->Unref(); - } else { - copied_resource = calib_res; - // Increase the refcount by 1 then transfer the ownership of that refcount - // to the ResourceMgr singleton. - copied_resource->Ref(); - OP_REQUIRES_OK(ctx, deprecated_rm->Create(funcdef_name_, "Calibrator", - copied_resource)); - } - } int num_inputs = ctx->num_inputs(); // Pass input data to calibrator std::unordered_map input_data; diff --git a/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc b/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc index 3bcca99afb..dd3c09d7e4 100644 --- a/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc +++ b/tensorflow/compiler/tf2tensorrt/utils/test_utils.cc @@ -19,7 +19,9 @@ limitations under the License. #include #include "re2/re2.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace tensorrt { diff --git a/tensorflow/compiler/tf2tensorrt/utils/test_utils.h b/tensorflow/compiler/tf2tensorrt/utils/test_utils.h index bcd628b62f..d85875991b 100644 --- a/tensorflow/compiler/tf2tensorrt/utils/test_utils.h +++ b/tensorflow/compiler/tf2tensorrt/utils/test_utils.h @@ -16,8 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TEST_UTILS_H_ #define TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TEST_UTILS_H_ -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/types.h" namespace tensorflow { namespace tensorrt { diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc deleted file mode 100644 index 0a72a88bc7..0000000000 --- a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h" -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace tensorrt { - -std::shared_ptr -tensorflow::tensorrt::TRTResourceManager::instance() { - static std::shared_ptr instance_(new TRTResourceManager); - return instance_; -} - -std::shared_ptr -tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { - // mutex is held for lookup only. Most instantiations where mutex will be held - // longer will be during op creation and should be ok. - tensorflow::mutex_lock lock(map_mutex_); - auto s = managers_.find(op_name); - if (s == managers_.end()) { - auto it = managers_.emplace( - op_name, std::make_shared(op_name)); - VLOG(1) << "Returning a new manager " << op_name; - return it.first->second; - } - VLOG(1) << "Returning old manager " << op_name; - return s->second; -} - -} // namespace tensorrt -} // namespace tensorflow diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h b/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h deleted file mode 100644 index 03879ffff2..0000000000 --- a/tensorflow/compiler/tf2tensorrt/utils/trt_resource_manager.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_ -#define TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_ -#include - -#include -#include -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/platform/mutex.h" - -namespace tensorflow { -namespace tensorrt { - -class TRTResourceManager { - TRTResourceManager() = default; - - public: - static std::shared_ptr instance(); - // returns a manager for given op, if it doesn't exists it creates one - std::shared_ptr getManager(const string& op_name); - - private: - std::unordered_map> - managers_; - tensorflow::mutex map_mutex_; -}; - -} // namespace tensorrt -} // namespace tensorflow - -#endif // TENSORFLOW_COMPILER_TF2TENSORRT_UTILS_TRT_RESOURCE_MANAGER_H_ diff --git a/tensorflow/python/compiler/tensorrt/trt_conversion.i b/tensorflow/python/compiler/tensorrt/trt_conversion.i index c12895c730..35a6fa137d 100644 --- a/tensorflow/python/compiler/tensorrt/trt_conversion.i +++ b/tensorflow/python/compiler/tensorrt/trt_conversion.i @@ -17,38 +17,10 @@ limitations under the License. %{ #define SWIG_FILE_WITH_INIT %} -%include "std_pair.i" +%include "std_string.i" %include "tensorflow/python/platform/base.i" %{ -PyObject* pair_helper(std::pair* in) { - PyObject *first(nullptr), *second(nullptr), *tuple(nullptr); - first = PyBytes_FromStringAndSize(in->first.data(), in->first.length()); - if (!first) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, "Pair conversion first argument failed"); - } - return NULL; - } - second = PyBytes_FromStringAndSize(in->second.data(), in->second.length()); - if (!second) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "Pair conversion second argument failed"); - } - return NULL; - } - tuple = Py_BuildValue("(OO)", first, second); - if (!tuple) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_TypeError, - "Tuple creation from pair failed!"); - } - return NULL; - } - return tuple; -} - struct version_struct{ int vmajor; int vminor; @@ -67,6 +39,7 @@ PyObject* version_helper(version_struct* in) { } return tuple; } + /* Define converters for vector */ template<> bool _PyObjAs(PyObject *pyobj, int* dest) { @@ -83,12 +56,6 @@ PyObject *_PyObjFrom(const int& src) { _LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong); -%typemap(out) std::pair { - PyObject *tuple = pair_helper(&$1); - if (!tuple) SWIG_fail; - $result = tuple; -} - %typemap(out) version_struct { PyObject *tuple = version_helper(&$1); if (!tuple) SWIG_fail; @@ -96,9 +63,6 @@ _LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong); } %{ -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/util/stat_summarizer.h" #include "tensorflow/compiler/tf2tensorrt/convert/convert_graph.h" #include "tensorflow/compiler/tf2tensorrt/convert/utils.h" #include "tensorflow/compiler/tf2tensorrt/utils/test_utils.h" @@ -106,7 +70,6 @@ _LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong); %ignoreall %unignore tensorflow; -%unignore calib_convert; %unignore get_linked_tensorrt_version; %unignore get_loaded_tensorrt_version; %unignore is_tensorrt_enabled; @@ -117,52 +80,6 @@ _LIST_OUTPUT_TYPEMAP(int, PyLong_FromLong); %{ -std::pair calib_convert( - string graph_def_string, bool is_dyn_op - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { -) { -#if GOOGLE_CUDA && GOOGLE_TENSORRT - string out_status; - - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair{out_status, ""}; - } - graph_def_string.resize(0); - tensorflow::GraphDef out_graph; - tensorflow::Status conversion_status = - tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph( - graph_def, &out_graph, is_dyn_op); - if (!conversion_status.ok()) { - auto retCode = (int)conversion_status.code(); - char buff[2000]; - snprintf(buff, 2000, "%d;%s", retCode, - conversion_status.error_message().c_str()); - out_status = buff; - return std::pair{out_status, ""}; - } - string result; - if (!out_graph.SerializeToString(&result)) { - out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair{out_status, ""}; - } - out_status = "OK;All good!"; - return std::pair{out_status, result}; -#else - // Returns FAILED_PRECONDITION. - return std::pair{"9;TensorRT is not enabled!", ""}; -#endif // GOOGLE_CUDA && GOOGLE_TENSORRT -} - version_struct get_linked_tensorrt_version() { // Return the version at the link time. version_struct s; @@ -221,8 +138,6 @@ PyObject* get_test_value(PyObject* label) { %} -std::pair calib_convert( - string graph_def_string, bool is_dyn_op); version_struct get_linked_tensorrt_version(); version_struct get_loaded_tensorrt_version(); bool is_tensorrt_enabled(); diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py index d3ed4644cc..0caa2bfaf3 100644 --- a/tensorflow/python/compiler/tensorrt/trt_convert.py +++ b/tensorflow/python/compiler/tensorrt/trt_convert.py @@ -20,13 +20,11 @@ from __future__ import print_function import six as _six from tensorflow.compiler.tf2tensorrt.python.ops import trt_ops -from tensorflow.core.framework import graph_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import meta_graph_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl as _impl from tensorflow.python.framework import graph_util from tensorflow.python.framework import importer from tensorflow.python.framework import ops @@ -428,7 +426,7 @@ class TrtGraphConverter(GraphConverter): trt_ops.load_trt_ops() # pylint: disable=g-import-not-at-top,unused-import,line-too-long,unused-variable # Import a random symbol to trigger loading of TRT library. - from tensorflow.python.compiler.tensorrt.wrap_conversion import calib_convert + from tensorflow.python.compiler.tensorrt.wrap_conversion import get_linked_tensorrt_version # pylint: enable=g-import-not-at-top,unused-import,line-too-long,unused-variable if rewriter_config_template is not None and not isinstance( @@ -768,51 +766,3 @@ def create_inference_graph( if output_saved_model_dir: trt_converter.save(output_saved_model_dir) return converted_graph_def - - -def calib_graph_to_infer_graph(calibration_graph_def, is_dynamic_op=False): - """Convert an existing calibration graph to inference graph. - - Args: - calibration_graph_def: the calibration GraphDef object with calibration data - is_dynamic_op: whether to create dynamic static engines from calibration - - Returns: - New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. - Raises: - RuntimeError: if the returned status message is malformed. - """ - # Lazily load the TF-TRT C bindings, so `import tensorflow` doesn't complain - # even if it cannot find TensorRT library. - trt_ops.load_trt_ops() - # pylint: disable=g-import-not-at-top,line-too-long - from tensorflow.python.compiler.tensorrt.wrap_conversion import calib_convert - # pylint: enable=g-import-not-at-top,line-too-long - - is_calib_graph = False - for n in calibration_graph_def.node: - if n.op == "TRTEngineOp": - is_calib_graph = is_calib_graph or not n.attr["calibration_data"].s - if not is_calib_graph: - tf_logging.error( - "Not a calib graph. Doesn't seem to contain any calibration nodes.") - return None - graph_str = calibration_graph_def.SerializeToString() - out = calib_convert(graph_str, is_dynamic_op) - status = _to_string(out[0]) - output_graph_def_string = out[1] - del graph_str # Save some memory - if len(status) < 2: - raise _impl.UnknownError(None, None, status) - if status[:2] != "OK": - msg = status.split(";") - if len(msg) == 1: - raise RuntimeError("Status message is malformed {}".format(status)) - # pylint: disable=protected-access - raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), - int(msg[0])) - # pylint: enable=protected-access - output_graph_def = graph_pb2.GraphDef() - output_graph_def.ParseFromString(output_graph_def_string) - del output_graph_def_string # Save some memory - return output_graph_def -- GitLab From 2b5c02c23ae93fe72c5a5f14ee750e4e16a7ee2f Mon Sep 17 00:00:00 2001 From: Thomas O'Malley Date: Wed, 13 Feb 2019 17:54:18 -0800 Subject: [PATCH 100/351] Change how TensorFlowOpLayer prevents sublayers from making Keras history. Instead of marking input tensors as `_keras_history_checked`, create a call_context object to determine if we are in a sublayer. Only set `_keras_history_checked` for tensors that don't come from keras inputs. PiperOrigin-RevId: 233867280 --- tensorflow/python/keras/engine/base_layer.py | 117 +++++++++--------- .../python/keras/engine/base_layer_utils.py | 22 +++- .../keras/layers/tensorflow_op_layer_test.py | 35 +++++- 3 files changed, 113 insertions(+), 61 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index cdd36ffcc2..a68230a2f3 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -535,8 +535,6 @@ class Layer(checkpointable.Checkpointable): # framework. if base_layer_utils.needs_keras_history(inputs): base_layer_utils.create_keras_history(inputs) - # Do not track these Tensors in any sublayers invoked during `call`. - base_layer_utils.mark_checked(inputs) # Handle Keras mask propagation from previous layer to current layer. previous_mask = None @@ -551,62 +549,65 @@ class Layer(checkpointable.Checkpointable): # pass to __call__, hence we set previous_mask as the default value. kwargs['mask'] = previous_mask - # Check input assumptions set after layer building, e.g. input shape. - if build_graph: - # Symbolic execution on symbolic tensors. We will attempt to build - # the corresponding TF subgraph inside `backend.get_graph()` - input_spec.assert_input_compatibility(self.input_spec, inputs, self.name) - graph = backend.get_graph() - with graph.as_default(), ops.name_scope(self._name_scope()): - # Build layer if applicable (if the `build` method has been overridden). - self._maybe_build(inputs) - if not self.dynamic: - try: - outputs = self.call(inputs, *args, **kwargs) - except TypeError as e: - messages = ('`tf.Tensor` as a Python `bool` is not allowed', - 'Tensor objects are only iterable when eager') - exception_str = str(e) - for msg in messages: - if msg in exception_str: - raise TypeError('You are attempting to use Python control ' - 'flow in a layer that was not declared to be ' - 'dynamic. Pass `dynamic=True` to the class ' - 'constructor.\nEncountered error:\n"""\n' + - exception_str + '\n"""') - raise - else: - # We will use static shape inference to return symbolic tensors - # matching the specifications of the layer outputs. - # Since `self.dynamic` is True, we will never attempt to - # run the underlying TF graph (which is disconnected). - # TODO(fchollet): consider py_func as an alternative, which - # would enable us to run the underlying graph if needed. - outputs = self._symbolic_call(inputs) - - if outputs is None: - raise ValueError('A layer\'s `call` method should return a ' - 'Tensor or a list of Tensors, not None ' - '(layer: ' + self.name + ').') - if base_layer_utils.have_all_keras_metadata(inputs): - inputs, outputs = self._set_connectivity_metadata_( - inputs, outputs, args, kwargs) - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, previous_mask) - if hasattr(self, '_set_inputs') and not self.inputs: - # Subclassed network: explicitly set metadata normally set by - # a call to self._set_inputs(). - # TODO(b/120997007): This should be done in Eager as well, but - # causes garbage collection issues because of the placeholders - # created on the default Keras graph. - self._set_inputs(inputs, outputs) - else: - # Eager execution on data tensors. - with ops.name_scope(self._name_scope()): - self._maybe_build(inputs) - outputs = self.call(inputs, *args, **kwargs) - self._handle_activity_regularization(inputs, outputs) - self._set_mask_metadata(inputs, outputs, previous_mask) + with base_layer_utils.call_context(): + # Check input assumptions set after layer building, e.g. input shape. + if build_graph: + # Symbolic execution on symbolic tensors. We will attempt to build + # the corresponding TF subgraph inside `backend.get_graph()` + input_spec.assert_input_compatibility(self.input_spec, inputs, + self.name) + graph = backend.get_graph() + with graph.as_default(), ops.name_scope(self._name_scope()): + # Build layer if applicable (if the `build` method has been + # overridden). + self._maybe_build(inputs) + if not self.dynamic: + try: + outputs = self.call(inputs, *args, **kwargs) + except TypeError as e: + messages = ('`tf.Tensor` as a Python `bool` is not allowed', + 'Tensor objects are only iterable when eager') + exception_str = str(e) + for msg in messages: + if msg in exception_str: + raise TypeError('You are attempting to use Python control ' + 'flow in a layer that was not declared to be ' + 'dynamic. Pass `dynamic=True` to the class ' + 'constructor.\nEncountered error:\n"""\n' + + exception_str + '\n"""') + raise + else: + # We will use static shape inference to return symbolic tensors + # matching the specifications of the layer outputs. + # Since `self.dynamic` is True, we will never attempt to + # run the underlying TF graph (which is disconnected). + # TODO(fchollet): consider py_func as an alternative, which + # would enable us to run the underlying graph if needed. + outputs = self._symbolic_call(inputs) + + if outputs is None: + raise ValueError('A layer\'s `call` method should return a ' + 'Tensor or a list of Tensors, not None ' + '(layer: ' + self.name + ').') + if base_layer_utils.have_all_keras_metadata(inputs): + inputs, outputs = self._set_connectivity_metadata_( + inputs, outputs, args, kwargs) + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata(inputs, outputs, previous_mask) + if hasattr(self, '_set_inputs') and not self.inputs: + # Subclassed network: explicitly set metadata normally set by + # a call to self._set_inputs(). + # TODO(b/120997007): This should be done in Eager as well, but + # causes garbage collection issues because of the placeholders + # created on the default Keras graph. + self._set_inputs(inputs, outputs) + else: + # Eager execution on data tensors. + with ops.name_scope(self._name_scope()): + self._maybe_build(inputs) + outputs = self.call(inputs, *args, **kwargs) + self._handle_activity_regularization(inputs, outputs) + self._set_mask_metadata(inputs, outputs, previous_mask) if not context.executing_eagerly(): # Optionally load weight values specified at layer instantiation. diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index a9fd17bcf8..40ac121738 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import collections as collections_lib +import threading import enum from tensorflow.python.framework import dtypes @@ -28,6 +29,9 @@ from tensorflow.python.ops import init_ops from tensorflow.python.ops import init_ops_v2 from tensorflow.python.ops import variables as tf_variables from tensorflow.python.util import nest +from tensorflow.python.util import tf_contextlib + +_call_context = threading.local() class CallConvention(enum.Enum): @@ -292,6 +296,11 @@ def _create_keras_history_helper(tensors, processed_ops=None): def needs_keras_history(tensors): """Check if any Tensors need to be wrapped in TensorFlowOpLayers. + This will never return True inside a sublayer, because sublayers + do not need to create Keras History. Otherwise, this returns True + if one or more of `tensors` originates from a `keras.Input` and + does not have `_keras_history` set. + Arguments: tensors: An arbitrary nested structure of Tensors. @@ -299,7 +308,7 @@ def needs_keras_history(tensors): Bool, whether at least one Tensor needs to be wrapped. """ input_tensors = nest.flatten(tensors) - if all( + if getattr(_call_context, 'in_call', False) or all( getattr(tensor, '_keras_history', None) is not None for tensor in input_tensors): # KerasHistory already set. @@ -362,3 +371,14 @@ def mark_checked(tensors): tensor._keras_history_checked = True # pylint: disable=protected-access nest.map_structure(_mark_checked, tensors) + + +@tf_contextlib.contextmanager +def call_context(): + """Scope that marks when we are currently inside a Layer/Model's `call`.""" + was_in_call = getattr(_call_context, 'in_call', False) + _call_context.in_call = True + try: + yield + finally: + _call_context.in_call = was_in_call diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index 4cdb3dfef9..77496ba4a6 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -102,6 +102,35 @@ def _add_n(): return inputs, outputs +def _reuse_op(): + inputs = keras.Input(shape=(10,)) + # This op needs to be checked multiple times. + x = gen_nn_ops.relu(inputs) + y = keras.layers.Dense(10)(x) + x2 = x * 2 + y2 = keras.layers.Dense(10)(x2) + outputs = y + y2 + return inputs, outputs + + +class LayerWithLayer(keras.layers.Layer): + + def build(self, input_shape): + self.bias = self.add_weight(name='bias', dtype='float32') + self.layer = keras.layers.Dense(10) + + def call(self, inputs): + inputs = inputs * self.bias + # Would throw an error if Keras History was created here. + return self.layer(inputs) + + +def _inner_layer(): + inputs = keras.Input(shape=(10,)) + outputs = LayerWithLayer()(inputs) + return inputs, outputs + + @keras_parameterized.run_all_keras_modes class AutoLambdaTest(keras_parameterized.TestCase): @@ -113,7 +142,8 @@ class AutoLambdaTest(keras_parameterized.TestCase): ('single_standalone_branch', _single_standalone_branch), ('single_op_with_attrs', _single_op_with_attrs), ('multiple_uses', _multiple_uses), - ('op_with_tensor_list', _op_with_tensor_list), ('add_n', _add_n)) + ('op_with_tensor_list', _op_with_tensor_list), ('add_n', _add_n), + ('_reuse_op', _reuse_op), ('_inner_layer', _inner_layer)) def test_autolambda(self, model_fn): inputs, outputs = model_fn() model = keras.Model(inputs, outputs) @@ -127,7 +157,8 @@ class AutoLambdaTest(keras_parameterized.TestCase): model.fit(np_inputs, np_outputs, batch_size=2) model(np_inputs) # Test calling the model directly on inputs. - new_model = keras.Model.from_config(model.get_config()) + new_model = keras.Model.from_config( + model.get_config(), custom_objects={'LayerWithLayer': LayerWithLayer}) new_model.compile( adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly()) new_model.fit(np_inputs, np_outputs, batch_size=2) -- GitLab From 616a81ceecbe09a08fc249eb8bc1ba23c8c33937 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 17:56:31 -0800 Subject: [PATCH 101/351] Extended UnrollBatchMatMul to support tensor with dimension greater than 3. Augmented UnrollBatchMatMul to handle transpose_[ab]. PiperOrigin-RevId: 233867480 --- tensorflow/lite/testing/generate_examples.py | 18 +- .../reorder_reshape_transpose.cc | 1 + .../unroll_batch_matmul.cc | 319 ++++++++++++------ tensorflow/lite/toco/import_tensorflow.cc | 11 +- tensorflow/lite/toco/model.h | 2 + 5 files changed, 249 insertions(+), 102 deletions(-) diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py index 215baa6449..fd1a60a8fc 100644 --- a/tensorflow/lite/testing/generate_examples.py +++ b/tensorflow/lite/testing/generate_examples.py @@ -4064,7 +4064,17 @@ def make_mirror_pad_tests(zip_path): def make_unroll_batch_matmul_tests(zip_path): """Make a set of tests to test unroll_batch_matmul.""" - test_parameters = [{"dtype": [tf.float32], "shape": [[(2, 2, 3), (2, 3, 2)]]}] + test_parameters = [{ + "dtype": [tf.float32], + "shape": [[(2, 2, 3), (2, 3, 2), False, False], + [(2, 2, 3), (2, 3, 2), True, True], + [(2, 2, 3), (2, 2, 3), False, True], + [(2, 2, 3), (2, 2, 3), True, False], + [(4, 2, 2, 3), (4, 2, 3, 2), False, False], + [(4, 2, 2, 3), (4, 2, 3, 2), True, True], + [(4, 2, 2, 3), (4, 2, 2, 3), False, True], + [(4, 2, 2, 3), (4, 2, 2, 3), True, False]] + }] def build_graph(parameters): """Build the batch_matmul op testing graph.""" @@ -4073,7 +4083,11 @@ def make_unroll_batch_matmul_tests(zip_path): input_tensor2 = tf.placeholder( dtype=parameters["dtype"], shape=parameters["shape"][1]) # Should be unrolled and replaced with fully_connected ops in the end. - out = tf.matmul(input_tensor1, input_tensor2) + out = tf.matmul( + input_tensor1, + input_tensor2, + transpose_a=parameters["shape"][2], + transpose_b=parameters["shape"][3]) return [input_tensor1, input_tensor2], [out] def build_inputs(parameters, sess, inputs, outputs): diff --git a/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc index fdd411c84c..77803d580e 100644 --- a/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc +++ b/tensorflow/lite/toco/graph_transformations/reorder_reshape_transpose.cc @@ -218,6 +218,7 @@ std::vector ComputeNewPerm(std::vector input_dims, CHECK_EQ(input_dims.size(), new_perm.size()); auto& transpose_array = model->GetOrCreateArray(transpose_op->inputs[1]); + transpose_array.data_type = ArrayDataType::kInt32; transpose_array.GetMutableBuffer().data = new_perm; *(transpose_array.mutable_shape()->mutable_dims()) = { static_cast(new_perm.size())}; diff --git a/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc index 41a735394d..7492f3e116 100644 --- a/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc +++ b/tensorflow/lite/toco/graph_transformations/unroll_batch_matmul.cc @@ -13,17 +13,192 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include +#include #include #include #include +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/lite/toco/graph_transformations/graph_transformations.h" #include "tensorflow/lite/toco/model.h" #include "tensorflow/lite/toco/tooling_util.h" -#include "tensorflow/core/platform/logging.h" namespace toco { +namespace { + +void UnrollBatchMatMul3D( + const string& input_lhs, const string& input_rhs, + const BatchMatMulOperator* batch_op, const std::vector batch, + Model* model, std::vector>::iterator* tail_it, + std::vector* pack_inputs) { + const std::string batch_name = + absl::StrCat(batch_op->outputs[0], "_b", absl::StrJoin(batch, "-")); + const auto& input_array_a = model->GetArray(input_lhs); + const auto& input_array_b = model->GetArray(input_rhs); + const int dims_count = input_array_a.shape().dimensions_count(); + + // tf.slice(a, ...). + std::vector begin_indices_a = batch; + begin_indices_a.resize(dims_count); + std::vector slice_size_a = input_array_a.shape().dims(); + for (int i = 0; i < batch.size(); ++i) { + slice_size_a[i] = 1; + } + auto* slice_a_op = new SliceOperator; + slice_a_op->inputs = { + input_lhs, + CreateInt32Array(model, batch_name + "/slice_a/slice/begin", + begin_indices_a), + CreateInt32Array(model, batch_name + "/slice_a/slice/size", slice_size_a), + }; + slice_a_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_a")}; + auto& slice_a_op_output = model->GetOrCreateArray(slice_a_op->outputs[0]); + slice_a_op_output.data_type = input_array_a.data_type; + *tail_it = model->operators.emplace(*tail_it, slice_a_op) + 1; + + // Reshape to remove the first dimension ([1,M,N] -> [M,N]). + auto* slice_a_reshape_op = new TensorFlowReshapeOperator; + slice_a_reshape_op->inputs = { + slice_a_op->outputs[0], + CreateInt32Array(model, batch_name + "/slice_a/reshape/shape", + {-1, input_array_a.shape().dims(dims_count - 1)})}; + slice_a_reshape_op->outputs = { + AvailableArrayName(*model, batch_name + "/slice_a/reshape")}; + auto& slice_a_reshape_op_output = + model->GetOrCreateArray(slice_a_reshape_op->outputs[0]); + slice_a_reshape_op_output.data_type = input_array_a.data_type; + *tail_it = model->operators.emplace(*tail_it, slice_a_reshape_op) + 1; + + // tf.slice(b, ...). + std::vector begin_indices_b = batch; + begin_indices_b.resize(dims_count); + std::vector slice_size_b = input_array_b.shape().dims(); + for (int i = 0; i < batch.size(); ++i) { + slice_size_b[i] = 1; + } + auto* slice_b_op = new SliceOperator; + slice_b_op->inputs = { + input_rhs, + CreateInt32Array(model, batch_name + "/slice_b/slice/begin", + begin_indices_b), + CreateInt32Array(model, batch_name + "/slice_b/slice/size", slice_size_b), + }; + slice_b_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_b")}; + auto& slice_b_op_output = model->GetOrCreateArray(slice_b_op->outputs[0]); + slice_b_op_output.data_type = input_array_b.data_type; + *tail_it = model->operators.emplace(*tail_it, slice_b_op) + 1; + + // Reshape to remove the first dimension ([1,M,N] -> [M,N]). + auto* slice_b_reshape_op = new TensorFlowReshapeOperator; + slice_b_reshape_op->inputs = { + slice_b_op->outputs[0], + CreateInt32Array(model, batch_name + "/slice_b/reshape/shape", + {-1, input_array_b.shape().dims(dims_count - 1)})}; + slice_b_reshape_op->outputs = { + AvailableArrayName(*model, batch_name + "/slice_b/reshape")}; + auto& slice_b_reshape_op_output = + model->GetOrCreateArray(slice_b_reshape_op->outputs[0]); + slice_b_reshape_op_output.data_type = input_array_b.data_type; + *tail_it = model->operators.emplace(*tail_it, slice_b_reshape_op) + 1; + + // tf.matmul(slice_a, slice_b). + auto* matmul_op = new TensorFlowMatMulOperator; + matmul_op->inputs = {slice_a_reshape_op->outputs[0], + slice_b_reshape_op->outputs[0]}; + matmul_op->outputs = {AvailableArrayName(*model, batch_name)}; + auto& matmul_op_output = model->GetOrCreateArray(matmul_op->outputs[0]); + matmul_op_output.data_type = input_array_a.data_type; + *tail_it = model->operators.emplace(*tail_it, matmul_op) + 1; + + // Add to stack. + pack_inputs->push_back(matmul_op->outputs[0]); +} + +std::vector UnrollBatchMatMulRecursion( + const string& input_lhs, const string& input_rhs, + const BatchMatMulOperator* batch_op, Model* model, + std::vector>::iterator* tail_it, + const std::vector& batch_prefix) { + const auto& input_array_a = model->GetArray(input_lhs); + const auto& dims_vec = input_array_a.shape().dims(); + const int current_dim_size = dims_vec[batch_prefix.size()]; + std::vector batch_pack_inputs; + + if (batch_prefix.size() + 3 == dims_vec.size()) { + // Base case + for (int batch = 0; batch < current_dim_size; ++batch) { + std::vector new_batch_prefix = batch_prefix; + new_batch_prefix.emplace_back(batch); + UnrollBatchMatMul3D(input_lhs, input_rhs, batch_op, new_batch_prefix, + model, tail_it, &batch_pack_inputs); + } + } else { + // Recursion + for (int batch = 0; batch < current_dim_size; ++batch) { + std::vector new_batch_prefix = batch_prefix; + new_batch_prefix.emplace_back(batch); + std::vector pack_inputs = UnrollBatchMatMulRecursion( + input_lhs, input_rhs, batch_op, model, tail_it, new_batch_prefix); + + // The pack that will join all the individual matmul results together. + auto* pack_op = new PackOperator; + std::string batch_name = absl::StrCat( + batch_op->outputs[0], "_b", absl::StrJoin(new_batch_prefix, "-")); + pack_op->inputs = pack_inputs; + pack_op->outputs = {AvailableArrayName(*model, batch_name + "/pack")}; + auto& pack_op_output = model->GetOrCreateArray(pack_op->outputs[0]); + pack_op_output.data_type = input_array_a.data_type; + pack_op->axis = 0; + pack_op->values_count = pack_inputs.size(); + *tail_it = model->operators.emplace(*tail_it, pack_op) + 1; + + batch_pack_inputs.push_back(pack_op->outputs[0]); + } + } + return batch_pack_inputs; +} + +std::vector GetTransposePerm(const Array& input_array) { + const int32 dims = input_array.shape().dimensions_count(); + std::vector perm_array_val(dims); + for (int i = 0; i < dims; ++i) { + perm_array_val[i] = i; + } + perm_array_val[dims - 2] = dims - 1; + perm_array_val[dims - 1] = dims - 2; + return perm_array_val; +} + +std::vector GetTransposeShape(const Shape& input_shape, + const std::vector& perm_array_val) { + const int32 dims = input_shape.dimensions_count(); + std::vector output_shape(dims); + for (int i = 0; i < dims; ++i) { + output_shape[i] = input_shape.dims(perm_array_val[i]); + } + return output_shape; +} + +TransposeOperator* TransposeInput(const string& input, Model* model) { + const auto& input_array = model->GetArray(input); + const auto perm_array = GetTransposePerm(input_array); + const string perm_array_name = CreateInt32Array( + model, AvailableArrayName(*model, input + "/transpose/perm"), perm_array); + auto* transpose_op = new TransposeOperator; + transpose_op->inputs = {input, perm_array_name}; + transpose_op->outputs = {AvailableArrayName(*model, input + "/transpose")}; + auto& transpose_array = model->GetOrCreateArray(transpose_op->outputs[0]); + *transpose_array.mutable_shape()->mutable_dims() = + GetTransposeShape(input_array.shape(), perm_array); + model->GetOrCreateArray(transpose_op->outputs[0]); + return transpose_op; +} + +} // namespace + // Unrolls a BatchMatMul on the batch dimension. // We need to slice each batch out of the inputs, matmul them individually, then // stack them all back together at the end. @@ -46,115 +221,67 @@ namespace toco { const auto* batch_op = static_cast(batch_op_it->get()); - // We must have the shape of at least one input to know our batch size. - const auto& input_array_a = model->GetArray(batch_op->inputs[0]); - const auto& input_array_b = model->GetArray(batch_op->inputs[1]); - if (!input_array_a.has_shape() || !input_array_b.has_shape()) + auto& tail_it = batch_op_it; + + string input_lhs = batch_op->inputs[0]; + string input_rhs = batch_op->inputs[1]; + const auto& input_lhs_array = model->GetArray(input_lhs); + const auto& input_rhs_array = model->GetArray(input_rhs); + if (!input_lhs_array.has_shape() || !input_rhs_array.has_shape()) return ::tensorflow::Status::OK(); - // We only support the rank 3 case. If you are batching on rank > 3 you'll - // have to figure that out. - CHECK_EQ(input_array_a.shape().dimensions_count(), - input_array_b.shape().dimensions_count()) - << "Input dimensions must have the same rank"; - if (input_array_a.shape().dimensions_count() == 2) { + // Transpose LHS input if necessary. + if (batch_op->adj_x) { + TransposeOperator* transpose_op = TransposeInput(input_lhs, model); + tail_it = model->operators.emplace(tail_it, transpose_op) + 1; + input_lhs = transpose_op->outputs[0]; + } + const auto& input_array_a = model->GetArray(input_lhs); + + // Transpose RHS input if necessary. + if (batch_op->adj_y) { + TransposeOperator* transpose_op = TransposeInput(input_rhs, model); + tail_it = model->operators.emplace(tail_it, transpose_op) + 1; + input_rhs = transpose_op->outputs[0]; + } + const auto& input_array_b = model->GetArray(input_rhs); + + const int dims = input_array_a.shape().dimensions_count(); + for (int i = 0; i < dims - 2; ++i) { + CHECK_EQ(input_array_a.shape().dims(i), input_array_b.shape().dims(i)) + << "input array not consistent at index " << i; + } + CHECK_EQ(input_array_a.shape().dims(dims - 1), + input_array_b.shape().dims(dims - 2)) + << "Input dimensions must be compatible for multipication. shape a = [" + << absl::StrJoin(input_array_a.shape().dims(), ", ") << "], shape b = [" + << absl::StrJoin(input_array_b.shape().dims(), ", ") << "]"; + + if (dims == 2) { // This is really just a MatMul. This likely means that someone hand-crafted // a graphdef with a BatchMatMul when they really wanted a MatMul. AddMessageF("Replacing non-batch BatchMatMul %s by a MatMul operator", LogName(*batch_op)); auto* matmul_op = new TensorFlowMatMulOperator; - matmul_op->inputs = batch_op->inputs; + matmul_op->inputs = {input_lhs, input_rhs}; matmul_op->outputs = batch_op->outputs; - const auto matmul_op_it = model->operators.emplace(batch_op_it, matmul_op); - batch_op_it = matmul_op_it + 1; - CHECK_EQ(batch_op_it->get(), batch_op); - model->operators.erase(batch_op_it); + tail_it = model->operators.emplace(tail_it, matmul_op) + 1; + CHECK_EQ(tail_it->get(), batch_op); + model->operators.erase(tail_it); *modified = true; return ::tensorflow::Status::OK(); } - CHECK_EQ(input_array_a.shape().dimensions_count(), 3) - << "Input arrays must have rank 3"; - // Perform the matmul for each slice of the batch. - int batch_count = input_array_a.shape().dims(0); - AddMessageF("Unrolling BatchMatMul %s %d times", LogName(*batch_op), - batch_count); - auto tail_it = batch_op_it; - std::vector pack_inputs; - for (int batch = 0; batch < batch_count; ++batch) { - std::string batch_name = - std::string(batch_op->outputs[0]) + "_b" + std::to_string(batch); - - // tf.slice(a, ...). - auto* slice_a_op = new SliceOperator; - slice_a_op->inputs = { - batch_op->inputs[0], - CreateInt32Array(model, batch_name + "/slice_a/slice/begin", - {batch, 0, 0}), - CreateInt32Array( - model, batch_name + "/slice_a/slice/size", - {1, input_array_a.shape().dims(1), input_array_a.shape().dims(2)}), - }; - slice_a_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_a")}; - auto& slice_a_op_output = model->GetOrCreateArray(slice_a_op->outputs[0]); - slice_a_op_output.data_type = input_array_a.data_type; - tail_it = model->operators.emplace(tail_it, slice_a_op) + 1; - - // Reshape to remove the first dimension ([1,M,N] -> [M,N]). - auto* slice_a_reshape_op = new TensorFlowReshapeOperator; - slice_a_reshape_op->inputs = { - slice_a_op->outputs[0], - CreateInt32Array(model, batch_name + "/slice_a/reshape/shape", - {-1, input_array_a.shape().dims(2)})}; - slice_a_reshape_op->outputs = { - AvailableArrayName(*model, batch_name + "/slice_a/reshape")}; - auto& slice_a_reshape_op_output = - model->GetOrCreateArray(slice_a_reshape_op->outputs[0]); - slice_a_reshape_op_output.data_type = input_array_a.data_type; - tail_it = model->operators.emplace(tail_it, slice_a_reshape_op) + 1; - - // tf.slice(b, ...). - auto* slice_b_op = new SliceOperator; - slice_b_op->inputs = { - batch_op->inputs[1], - CreateInt32Array(model, batch_name + "/slice_b/slice/begin", - {batch, 0, 0}), - CreateInt32Array( - model, batch_name + "/slice_b/slice/size", - {1, input_array_b.shape().dims(1), input_array_b.shape().dims(2)}), - }; - slice_b_op->outputs = {AvailableArrayName(*model, batch_name + "/slice_b")}; - auto& slice_b_op_output = model->GetOrCreateArray(slice_b_op->outputs[0]); - slice_b_op_output.data_type = input_array_b.data_type; - tail_it = model->operators.emplace(tail_it, slice_b_op) + 1; - - // Reshape to remove the first dimension ([1,M,N] -> [M,N]). - auto* slice_b_reshape_op = new TensorFlowReshapeOperator; - slice_b_reshape_op->inputs = { - slice_b_op->outputs[0], - CreateInt32Array(model, batch_name + "/slice_b/reshape/shape", - {-1, input_array_b.shape().dims(2)})}; - slice_b_reshape_op->outputs = { - AvailableArrayName(*model, batch_name + "/slice_b/reshape")}; - auto& slice_b_reshape_op_output = - model->GetOrCreateArray(slice_b_reshape_op->outputs[0]); - slice_b_reshape_op_output.data_type = input_array_b.data_type; - tail_it = model->operators.emplace(tail_it, slice_b_reshape_op) + 1; - - // tf.matmul(slice_a, slice_b). - auto* matmul_op = new TensorFlowMatMulOperator; - matmul_op->inputs = {slice_a_reshape_op->outputs[0], - slice_b_reshape_op->outputs[0]}; - matmul_op->outputs = {AvailableArrayName(*model, batch_name)}; - auto& matmul_op_output = model->GetOrCreateArray(matmul_op->outputs[0]); - matmul_op_output.data_type = input_array_a.data_type; - tail_it = model->operators.emplace(tail_it, matmul_op) + 1; + CHECK_GE(input_array_a.shape().dimensions_count(), 3) + << "Input arrays must have rank >= 3"; - // Add to stack. - pack_inputs.push_back(matmul_op->outputs[0]); - } + const auto& dims_vec = input_array_a.shape().dims(); + AddMessageF("Unrolling BatchMatMul %s %d times", LogName(*batch_op), + std::accumulate(dims_vec.begin(), dims_vec.end() - 2, 1, + std::multiplies())); - // The pack that will join all the individual matmul results together. + std::vector pack_inputs = UnrollBatchMatMulRecursion( + input_lhs, input_rhs, batch_op, model, &tail_it, {}); auto* pack_op = new PackOperator; pack_op->inputs = pack_inputs; pack_op->outputs = {batch_op->outputs[0]}; diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc index 9dba306e83..033c84b002 100644 --- a/tensorflow/lite/toco/import_tensorflow.cc +++ b/tensorflow/lite/toco/import_tensorflow.cc @@ -1092,11 +1092,14 @@ tensorflow::Status ConvertBatchMatMulOperator( Model* model) { TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 2)); - // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions - CHECK(!HasAttr(node, "adj_a") || (GetBoolAttr(node, "adj_a") == false)); - CHECK(!HasAttr(node, "adj_b") || (GetBoolAttr(node, "adj_b") == false)); - auto* batch_matmul = new BatchMatMulOperator; + // https://www.tensorflow.org/versions/r0.12/api_docs/python/math_ops/matrix_math_functions + if (HasAttr(node, "adj_x")) { + batch_matmul->adj_x = GetBoolAttr(node, "adj_x"); + } + if (HasAttr(node, "adj_y")) { + batch_matmul->adj_y = GetBoolAttr(node, "adj_y"); + } batch_matmul->inputs = {node.input(0), node.input(1)}; batch_matmul->outputs = {node.name()}; diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index daf4605555..05cc3c0e7e 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -966,6 +966,8 @@ struct TensorFlowIdentityOperator : Operator { // TensorFlow equivalent: MatMul struct BatchMatMulOperator : Operator { BatchMatMulOperator() : Operator(OperatorType::kBatchMatMul) {} + bool adj_x = false; + bool adj_y = false; }; // General matrix multiplication operator. We don't want to support general -- GitLab From 03f8a1a2459a6becc1eaf6ab76f274fe2f8c6ab9 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Wed, 13 Feb 2019 18:03:02 -0800 Subject: [PATCH 102/351] Fix doc string: remove the internal references and simplify metric sub-classing example. PiperOrigin-RevId: 233868359 --- tensorflow/python/keras/metrics.py | 31 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index f24f8685d6..9ceb03395f 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -109,28 +109,25 @@ class Metric(Layer): Example subclass implementation: ``` - class BinaryTruePositives(Metric): - def __init__(self, name='binary_true_positives', dtype=None): - super(BinaryTruePositives, self).__init__(name=name, dtype=dtype) - self.true_positives = self.add_weight( - 'true_positives', initializer=init_ops.zeros_initializer) + class BinaryTruePositives(tf.keras.metrics.Metric): + + def __init__(self, name='binary_true_positives'): + super(BinaryTruePositives, self).__init__(name=name) + self.true_positives = self.add_weight(name='tp', initializer='zeros') def update_state(self, y_true, y_pred, sample_weight=None): - y_true = math_ops.cast(y_true, dtypes.bool) - y_pred = math_ops.cast(y_pred, dtypes.bool) - y_pred, y_true, sample_weight = squeeze_or_expand_dimensions( - y_pred, y_true, sample_weight) - - values = math_ops.logical_and( - math_ops.equal(y_true, True), math_ops.equal(y_pred, True)) - values = math_ops.cast(values, self._dtype) + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + values = tf.logical_and(tf.equal(y_true, True), tf.equal(y_pred, True)) + values = tf.cast(values, self.dtype) if sample_weight is not None: - sample_weight = math_ops.cast(sample_weight, self._dtype) - values = math_ops.multiply(values, sample_weight) - self.true_positives.assign_add(math_ops.reduce_sum(values)) + sample_weight = tf.cast(sample_weight, self.dtype) + values = tf.multiply(values, sample_weight) + return self.true_positives.assign_add(tf.reduce_sum(values)) def result(self): - return array_ops.identity(self.true_positives) + return tf.identity(self.true_positives) ``` """ -- GitLab From dc4a055054e9d3e2ac36551dce1b02d4c2268839 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 13 Feb 2019 18:16:03 -0800 Subject: [PATCH 103/351] Internal change. PiperOrigin-RevId: 233870123 --- tensorflow/compat_template.__init__.py | 1 - tensorflow/tools/api/tests/BUILD | 5 +---- tensorflow/tools/api/tests/api_compatibility_test.py | 9 ++++----- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/compat_template.__init__.py b/tensorflow/compat_template.__init__.py index 05fd9cd981..a0c3b0f6fc 100644 --- a/tensorflow/compat_template.__init__.py +++ b/tensorflow/compat_template.__init__.py @@ -22,7 +22,6 @@ import os as _os import sys as _sys # pylint: disable=g-bad-import-order -from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import # API IMPORTS PLACEHOLDER diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 4efa4a9651..8764409e4d 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -15,10 +15,7 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_binary") py_test( name = "api_compatibility_test", - srcs = [ - "api_compatibility_test.py", - "//tensorflow:tf_python_api_gen_v2", - ], + srcs = ["api_compatibility_test.py"], data = [ "//tensorflow/tools/api/golden:api_golden_v1", "//tensorflow/tools/api/golden:api_golden_v2", diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index a467a22f5d..0d3501a45d 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -33,7 +33,6 @@ import re import sys import tensorflow as tf -from tensorflow._api.v2 import v2 as tf_v2 from google.protobuf import message from google.protobuf import text_format @@ -256,7 +255,7 @@ class ApiCompatibilityTest(test.TestCase): visitor.do_not_descend_map['tf'].append('contrib') if FLAGS.only_test_core_api: visitor.do_not_descend_map['tf'].extend(_NON_CORE_PACKAGES) - traverse.traverse(tf_v2.compat.v1, visitor) + traverse.traverse(tf.compat.v1, visitor) def testNoSubclassOfMessageV2(self): if not hasattr(tf.compat, 'v2'): @@ -316,7 +315,7 @@ class ApiCompatibilityTest(test.TestCase): @test_util.run_v1_only('b/120545219') def testAPIBackwardsCompatibility(self): - api_version = 1 + api_version = 2 if '_api.v2' in tf.__name__ else 1 golden_file_pattern = os.path.join( resource_loader.get_root_dir_with_all_resources(), _KeyToFilePath('*', api_version)) @@ -339,7 +338,7 @@ class ApiCompatibilityTest(test.TestCase): golden_file_pattern = os.path.join( resource_loader.get_root_dir_with_all_resources(), _KeyToFilePath('*', api_version)) - self._checkBackwardsCompatibility(tf_v2.compat.v1, golden_file_pattern, + self._checkBackwardsCompatibility(tf.compat.v1, golden_file_pattern, api_version) def testAPIBackwardsCompatibilityV2(self): @@ -348,7 +347,7 @@ class ApiCompatibilityTest(test.TestCase): resource_loader.get_root_dir_with_all_resources(), _KeyToFilePath('*', api_version)) self._checkBackwardsCompatibility( - tf_v2, + tf.compat.v2, golden_file_pattern, api_version, additional_private_map={'tf.compat': ['v1', 'v2']}) -- GitLab From 23dd71c502a197d98f3a2144f9213208481b8ad7 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Wed, 13 Feb 2019 18:17:27 -0800 Subject: [PATCH 104/351] Create int8 pad and pad2. PiperOrigin-RevId: 233870300 --- tensorflow/lite/kernels/pad.cc | 25 +++ tensorflow/lite/kernels/pad_test.cc | 198 +++++++++++-------- tensorflow/lite/kernels/register.cc | 6 +- tensorflow/lite/toco/tflite/operator.cc | 12 ++ tensorflow/lite/toco/tflite/operator_test.cc | 6 + 5 files changed, 163 insertions(+), 84 deletions(-) diff --git a/tensorflow/lite/kernels/pad.cc b/tensorflow/lite/kernels/pad.cc index 8e6ed6e741..b60b3dd9c8 100644 --- a/tensorflow/lite/kernels/pad.cc +++ b/tensorflow/lite/kernels/pad.cc @@ -214,6 +214,31 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { } } } break; + case kTfLiteInt8: { + int8_t pad_value; + if (op_context.constant_values == nullptr) { + // Quantized Pad requires that 0 is represented in the quantized + // range. + TF_LITE_ENSURE(context, op_context.output->params.zero_point >= + std::numeric_limits::min()); + TF_LITE_ENSURE(context, op_context.output->params.zero_point <= + std::numeric_limits::max()); + pad_value = static_cast(op_context.output->params.zero_point); + } else { + // Quantized Pad requires that 'constant_values' is represented in the + // same quantized range as the input and output tensors. + TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point, + op_context.constant_values->params.zero_point); + TF_LITE_ENSURE_EQ(context, op_context.output->params.scale, + op_context.constant_values->params.scale); + pad_value = *GetTensorData(op_context.constant_values); + } + if (op_context.resizing_category == ResizingCategory::kImageStyle) { + TF_LITE_PAD(reference_ops, PadImageStyle, int8_t, pad_value); + } else { + TF_LITE_PAD(reference_ops, Pad, int8_t, pad_value); + } + } break; case kTfLiteInt32: { int32_t pad_value = op_context.constant_values == nullptr diff --git a/tensorflow/lite/kernels/pad_test.cc b/tensorflow/lite/kernels/pad_test.cc index 3caa4065dc..97f95264f1 100644 --- a/tensorflow/lite/kernels/pad_test.cc +++ b/tensorflow/lite/kernels/pad_test.cc @@ -24,31 +24,34 @@ namespace { using ::testing::ElementsAreArray; using ::testing::Matcher; -template +template class PadOpModel : public SingleOpModel { public: - void SetInput(std::initializer_list data) { - PopulateTensor(input_, data); + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); } void SetQuantizedInput(std::initializer_list data) { - QuantizeAndPopulate(input_, data); + QuantizeAndPopulate(input_, data); } void SetQuantizedPadValue(float data) { - QuantizeAndPopulate(constant_values_, {data}); + QuantizeAndPopulate(constant_values_, {data}); } void SetPaddings(std::initializer_list paddings) { PopulateTensor(paddings_, paddings); } - std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutput() { + return ExtractVector(output_); + } std::vector GetOutputShape() { return GetTensorShape(output_); } std::vector GetDequantizedOutput() { - return Dequantize(ExtractVector(output_), - GetScale(output_), GetZeroPoint(output_)); + return Dequantize( + ExtractVector(output_), GetScale(output_), + GetZeroPoint(output_)); } protected: @@ -59,18 +62,18 @@ class PadOpModel : public SingleOpModel { }; // Tests case where paddings is a const tensor. Type T is the dtype. -template -class PadV2OpConstModel : public PadOpModel { +template +class PadV2OpConstModel : public PadOpModel { public: PadV2OpConstModel(const TensorData& input, std::initializer_list paddings_shape, - std::initializer_list paddings, T constant_values, + std::initializer_list paddings, T1 constant_values, const TensorData& output) { this->input_ = this->AddInput(input); this->paddings_ = this->AddConstInput(TensorType_INT32, paddings, paddings_shape); this->constant_values_ = - this->AddConstInput(GetTensorType(), {constant_values}, {1}); + this->AddConstInput(GetTensorType(), {constant_values}, {1}); this->output_ = this->AddOutput(output); @@ -103,7 +106,7 @@ class PadV2OpConstModel : public PadOpModel { // PadOpDynamicModel m(input_shape, paddings_shape, paddings_data); // m.SetInput(input_data); // m.Invoke(); -class PadOpConstModel : public PadOpModel { +class PadOpConstModel : public PadOpModel { public: PadOpConstModel(const TensorData& input, std::initializer_list paddings_shape, @@ -121,16 +124,18 @@ class PadOpConstModel : public PadOpModel { }; // Test case where paddings is a non-const tensor. -template -class PadV2OpDynamicModel : public PadOpModel { +template +class PadV2OpDynamicModel + : public PadOpModel { public: PadV2OpDynamicModel(const TensorData& input, std::initializer_list paddings_shape, - T constant_values, const TensorData& output) { + RegularInputOuput constant_values, + const TensorData& output) { this->input_ = this->AddInput(input); this->paddings_ = this->AddInput(TensorType_INT32); - this->constant_values_ = - this->AddConstInput(GetTensorType(), {constant_values}, {1}); + this->constant_values_ = this->AddConstInput( + GetTensorType(), {constant_values}, {1}); this->output_ = this->AddOutput(output); this->SetBuiltinOp(BuiltinOperator_PADV2, BuiltinOptions_PadV2Options, @@ -159,7 +164,7 @@ class PadV2OpDynamicModel : public PadOpModel { // m.SetInput(input_data); // m.SetPaddings(paddings_data); // m.Invoke(); -class PadOpDynamicModel : public PadOpModel { +class PadOpDynamicModel : public PadOpModel { public: PadOpDynamicModel(const TensorData& input, std::initializer_list paddings_shape, @@ -377,34 +382,47 @@ TEST_F(QuantizedPadOpTest, AdvancedDynamicTest) { #ifdef GTEST_HAS_DEATH_TEST TEST(PadV2OpTest, TooManyDimensions) { - EXPECT_DEATH(PadV2OpConstModel( - {TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2}, - {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, 0.0, - {TensorType_FLOAT32}), + typedef PadV2OpConstModel f; + EXPECT_DEATH(f({TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2}, + {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}, 0.0, + {TensorType_FLOAT32}), "dims <= 4"); } TEST(PadV2OpTest, UnequalDimensions) { - EXPECT_DEATH( - PadV2OpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2}, - {1, 1, 2, 2, 3, 3}, 0.0, {TensorType_FLOAT32}), - "3 != 4"); + typedef PadV2OpConstModel f; + EXPECT_DEATH(f({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2}, {1, 1, 2, 2, 3, 3}, + 0.0, {TensorType_FLOAT32}), + "3 != 4"); } TEST(PadV2OpTest, InvalidPadValue) { - EXPECT_DEATH(PadV2OpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, - {4, 2}, {0, 0, 1, -1, 2, -1, 0, 0}, 0.0, - {TensorType_FLOAT32}), + typedef PadV2OpConstModel f; + EXPECT_DEATH(f({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2}, + {0, 0, 1, -1, 2, -1, 0, 0}, 0.0, {TensorType_FLOAT32}), "Pad value has to be greater than equal to 0."); } #endif -TEST(PadV2OpTest, SimpleConstTest) { +TEST(PadV2OpTest, SimpleConstTestUint8) { + // Padding is represented as four 2-D lists representing above padding and + // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). + PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 0.0, + {TensorType_FLOAT32}); + m.SetInput({1, 2, 3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, + 0, 0, 0, 0, 0})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST(PadV2OpTest, SimpleConstTestInt8) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, - {0, 0, 1, 1, 1, 1, 0, 0}, 0.0, - {TensorType_FLOAT32}); + PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, 0.0, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4, @@ -412,11 +430,25 @@ TEST(PadV2OpTest, SimpleConstTest) { EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); } -TEST(PadV2OpTest, SimpleConstFloat32ValuedTest) { +TEST(PadV2OpTest, SimpleConstFloat32ValuedTestUint8) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, - {0, 0, 1, 1, 1, 1, 0, 0}, 5, {TensorType_FLOAT32}); + PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 5, + {TensorType_FLOAT32}); + m.SetInput({1, 2, 3, 4}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, + 5, 5, 5, 5, 5})); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1})); +} + +TEST(PadV2OpTest, SimpleConstFloat32ValuedTestInt8) { + // Padding is represented as four 2-D lists representing above padding and + // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). + PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, 5, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, @@ -427,8 +459,9 @@ TEST(PadV2OpTest, SimpleConstFloat32ValuedTest) { TEST(PadV2OpTest, Simple4DConstFloat32ValuedTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2}, - {0, 1, 0, 0, 0, 0, 0, 1}, 5, {TensorType_FLOAT32}); + PadV2OpConstModel m({TensorType_FLOAT32, {1, 1, 2, 1}}, + {4, 2}, {0, 1, 0, 0, 0, 0, 0, 1}, 5, + {TensorType_FLOAT32}); m.SetInput({3, 3}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({3, 5, 3, 5, 5, 5, 5, 5})); @@ -438,8 +471,9 @@ TEST(PadV2OpTest, Simple4DConstFloat32ValuedTest) { TEST(PadV2OpTest, SimpleConstInt32ValuedTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_INT32, {1, 2, 2, 1}}, {4, 2}, - {0, 0, 1, 1, 1, 1, 0, 0}, 5, {TensorType_INT32}); + PadV2OpConstModel m({TensorType_INT32, {1, 2, 2, 1}}, + {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 5, + {TensorType_INT32}); m.SetInput({1, 2, 3, 4}); m.Invoke(); EXPECT_THAT(m.GetOutput(), ElementsAreArray({5, 5, 5, 5, 5, 1, 2, 5, 5, 3, 4, @@ -448,8 +482,8 @@ TEST(PadV2OpTest, SimpleConstInt32ValuedTest) { } TEST(PadV2OpTest, SimpleDynamicTest) { - PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, 0.0, - {TensorType_FLOAT32}); + PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {4, 2}, 0.0, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); m.Invoke(); @@ -459,8 +493,8 @@ TEST(PadV2OpTest, SimpleDynamicTest) { } TEST(PadV2OpTest, SimpleDynamicValuedTest) { - PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2}, 5, - {TensorType_FLOAT32}); + PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, + {4, 2}, 5, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4}); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); m.Invoke(); @@ -470,8 +504,9 @@ TEST(PadV2OpTest, SimpleDynamicValuedTest) { } TEST(PadV2OpTest, AdvancedConstTest) { - PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, - {0, 0, 0, 2, 1, 3, 0, 0}, 0, {TensorType_FLOAT32}); + PadV2OpConstModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, + {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}, 0, + {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.Invoke(); EXPECT_THAT(m.GetOutput(), @@ -481,8 +516,8 @@ TEST(PadV2OpTest, AdvancedConstTest) { } TEST(PadV2OpTest, AdvancedDynamicTest) { - PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2}, 0, - {TensorType_FLOAT32}); + PadV2OpDynamicModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, + {4, 2}, 0, {TensorType_FLOAT32}); m.SetInput({1, 2, 3, 4, 5, 6}); m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); m.Invoke(); @@ -505,21 +540,20 @@ class QuantizedPadV2OpTest : public ::testing::Test { TEST_F(QuantizedPadV2OpTest, ZeroNotInQuantizationRange) { // The test_util and actual quantization code currently ensure that the range // must include zero, but if that ever changes, this test will catch it. - EXPECT_DEATH( - PadV2OpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0}, - {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, 0, - {TensorType_UINT8, {}, 1.0, 2.0}), - ".*Check failed: f_min <= 0.*"); + typedef PadV2OpConstModel f; + EXPECT_DEATH(f({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, 0, {TensorType_UINT8, {}, 1.0, 2.0}), + ".*Check failed: f_min <= 0.*"); } #endif TEST_F(QuantizedPadV2OpTest, SimpleConstTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, - {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpConstModel m( + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); m.SetQuantizedPadValue(0); m.Invoke(); @@ -531,9 +565,9 @@ TEST_F(QuantizedPadV2OpTest, SimpleConstTest) { } TEST_F(QuantizedPadV2OpTest, SimpleDynamicTest) { - PadV2OpDynamicModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpDynamicModel m( + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); m.SetQuantizedPadValue(0); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); @@ -546,10 +580,10 @@ TEST_F(QuantizedPadV2OpTest, SimpleDynamicTest) { } TEST_F(QuantizedPadV2OpTest, AdvancedConstTest) { - PadV2OpConstModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, - {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}, - {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpConstModel m( + {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); m.SetQuantizedPadValue(0); m.Invoke(); @@ -562,9 +596,9 @@ TEST_F(QuantizedPadV2OpTest, AdvancedConstTest) { } TEST_F(QuantizedPadV2OpTest, AdvancedDynamicTest) { - PadV2OpDynamicModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, - {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpDynamicModel m( + {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); m.SetQuantizedPadValue(0); m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); @@ -580,10 +614,10 @@ TEST_F(QuantizedPadV2OpTest, AdvancedDynamicTest) { TEST_F(QuantizedPadV2OpTest, SimpleConstValuedTest) { // Padding is represented as four 2-D lists representing above padding and // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}). - PadV2OpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0}, - {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpConstModel m( + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); m.SetQuantizedPadValue(-0.5); m.Invoke(); @@ -596,9 +630,9 @@ TEST_F(QuantizedPadV2OpTest, SimpleConstValuedTest) { } TEST_F(QuantizedPadV2OpTest, SimpleDynamicValuedTest) { - PadV2OpDynamicModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, - {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpDynamicModel m( + {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7}); m.SetQuantizedPadValue(-0.5); m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0}); @@ -612,10 +646,10 @@ TEST_F(QuantizedPadV2OpTest, SimpleDynamicValuedTest) { } TEST_F(QuantizedPadV2OpTest, AdvancedConstValuedTest) { - PadV2OpConstModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, - {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0}, - {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpConstModel m( + {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_UINT8, {1}, -1.0, 1.0}, + {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); m.SetQuantizedPadValue(-0.5); m.Invoke(); @@ -629,9 +663,9 @@ TEST_F(QuantizedPadV2OpTest, AdvancedConstValuedTest) { } TEST_F(QuantizedPadV2OpTest, AdvancedDynamicValuedTest) { - PadV2OpDynamicModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, - {4, 2}, {TensorType_UINT8, {1}, -1.0, 1.0}, - {TensorType_UINT8, {}, -1.0, 1.0}); + PadV2OpDynamicModel m( + {TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2}, + {TensorType_UINT8, {1}, -1.0, 1.0}, {TensorType_UINT8, {}, -1.0, 1.0}); m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3}); m.SetQuantizedPadValue(-0.5); m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0}); diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 0a0ed49f88..d445129d0c 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -234,8 +234,10 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, Register_UNIDIRECTIONAL_SEQUENCE_LSTM(), /* min_version */ 1, /* max_version */ 2); - AddBuiltin(BuiltinOperator_PAD, Register_PAD()); - AddBuiltin(BuiltinOperator_PADV2, Register_PADV2()); + AddBuiltin(BuiltinOperator_PAD, Register_PAD(), /* min_version */ 1, + /* max_version */ 2); + AddBuiltin(BuiltinOperator_PADV2, Register_PADV2(), /* min_version */ 1, + /* max_version */ 2); AddBuiltin(BuiltinOperator_RESHAPE, Register_RESHAPE()); AddBuiltin(BuiltinOperator_RESIZE_BILINEAR, Register_RESIZE_BILINEAR(), /* min_version */ 1, diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 8192123cfc..f22f7ff10b 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -764,6 +764,12 @@ class Pad : public BuiltinOperatorinputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + // If the op take int8 input, it is version 2. + if (input_array.data_type == ArrayDataType::kInt8) { + return 2; + } return 1; } }; @@ -801,6 +807,12 @@ class PadV2 : public BuiltinOperatorinputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + // If the op take int8 input, it is version 2. + if (input_array.data_type == ArrayDataType::kInt8) { + return 2; + } return 1; } }; diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index c8124f11c9..62d3997a91 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -836,6 +836,12 @@ TEST_F(OperatorTest, VersioningAddTest) { SimpleVersioningTest(); } TEST_F(OperatorTest, VersioningSubTest) { SimpleVersioningTest(); } +TEST_F(OperatorTest, VersioningPadTest) { SimpleVersioningTest(); } + +TEST_F(OperatorTest, VersioningPadV2Test) { + SimpleVersioningTest(); +} + TEST_F(OperatorTest, VersioningSelectTest) { SelectOperator select_op; select_op.inputs = {"input1"}; -- GitLab From 87f0fac2a1259ad742419bdf2b893a5aacd4fcbf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 18:18:15 -0800 Subject: [PATCH 105/351] Add more functionality to TensorProtoHelper (in preparation for adding tensor compression utility.) Optimize CreateTensorProto by using the new helpers. PiperOrigin-RevId: 233870395 --- tensorflow/core/framework/tensor_util.h | 124 +++++++++++------------- 1 file changed, 58 insertions(+), 66 deletions(-) diff --git a/tensorflow/core/framework/tensor_util.h b/tensorflow/core/framework/tensor_util.h index a7cf600bab..82a05e4c8d 100644 --- a/tensorflow/core/framework/tensor_util.h +++ b/tensorflow/core/framework/tensor_util.h @@ -16,11 +16,14 @@ limitations under the License. #ifndef TENSORFLOW_CORE_FRAMEWORK_TENSOR_UTIL_H_ #define TENSORFLOW_CORE_FRAMEWORK_TENSOR_UTIL_H_ +#include +#include #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" #include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/platform/protobuf.h" +#include "tensorflow/core/platform/types.h" -#include namespace tensorflow { namespace tensor { @@ -61,83 +64,74 @@ void SetTensorProtoShape(std::vector shape, TensorShapeProto* shape_proto); // Defines value type dependent methods to manipulate `TensorProto`. -// Class specializations has to define following methods: +// Class specializations have to define following methods: // static DataType GetDataType() // static void AddValue(Type value, TensorProto* proto) +// template +// static void AddValues(IterType begin, IterType end, TensorProto* proto) + template class TensorProtoHelper : public std::false_type {}; -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_STRING; } - static void AddValue(const string& value, TensorProto* proto) { - *proto->mutable_string_val()->Add() = value; +#define DEFINE_PROTO_HELPER(TYPE, TF_TYPE, FIELDTYPE) \ + template <> \ + class TensorProtoHelper : public std::true_type { \ + public: \ + static DataType GetDataType() { return DataType::TF_TYPE; } \ + static void AddValue(const TYPE& value, TensorProto* proto) { \ + proto->mutable_##FIELDTYPE##_val()->Add(value); \ + } \ + template \ + static void AddValues(IterType begin, IterType end, TensorProto* proto) { \ + using SrcType = typename std::iterator_traits::value_type; \ + size_t n = std::distance(begin, end); \ + FIELDTYPE* dst_ptr = AppendUninitialized(n, proto); \ + if (std::is_same::value) { \ + std::copy(begin, end, dst_ptr); \ + } else { \ + std::transform(begin, end, dst_ptr, [](SrcType x) -> FIELDTYPE { \ + return static_cast(x); \ + }); \ + } \ + } \ + \ + private: \ + static FIELDTYPE* AppendUninitialized(size_t n, TensorProto* proto) { \ + auto* field = proto->mutable_##FIELDTYPE##_val(); \ + field->Reserve(field->size() + n); \ + return reinterpret_cast(field->AddNAlreadyReserved(n)); \ + } \ } -}; -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_INT32; } - static void AddValue(int32 value, TensorProto* proto) { - proto->mutable_int_val()->Add(value); - } -}; +DEFINE_PROTO_HELPER(float, DT_FLOAT, float); +DEFINE_PROTO_HELPER(double, DT_DOUBLE, double); +DEFINE_PROTO_HELPER(int8, DT_INT8, int); +DEFINE_PROTO_HELPER(uint8, DT_UINT8, int); +DEFINE_PROTO_HELPER(int16, DT_INT16, int); +DEFINE_PROTO_HELPER(uint16, DT_UINT16, int); +DEFINE_PROTO_HELPER(int32, DT_INT32, int); +DEFINE_PROTO_HELPER(uint32, DT_UINT32, uint32); +DEFINE_PROTO_HELPER(int64, DT_INT64, int64); +DEFINE_PROTO_HELPER(uint64, DT_UINT64, uint64); +DEFINE_PROTO_HELPER(bool, DT_BOOL, bool); -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_INT64; } - static void AddValue(int64 value, TensorProto* proto) { - proto->mutable_int64_val()->Add(value); - } -}; +#undef DEFINE_PROTO_HELPER template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_UINT32; } - static void AddValue(uint32 value, TensorProto* proto) { - proto->mutable_uint32_val()->Add(value); - } -}; - -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_UINT64; } - static void AddValue(uint64 value, TensorProto* proto) { - proto->mutable_uint64_val()->Add(value); - } -}; - -template <> -class TensorProtoHelper : public std::true_type { +class TensorProtoHelper : public std::true_type { public: - static DataType GetDataType() { return DataType::DT_FLOAT; } - static void AddValue(float value, TensorProto* proto) { - proto->mutable_float_val()->Add(value); + static DataType GetDataType() { return DataType::DT_STRING; } + static void AddValue(const string& value, TensorProto* proto) { + *proto->mutable_string_val()->Add() = value; } -}; - -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_DOUBLE; } - static void AddValue(double value, TensorProto* proto) { - proto->mutable_double_val()->Add(value); + template + static void AddValues(IterType begin, IterType end, TensorProto* proto) { + for (IterType it = begin; it != end; ++it) { + AddValue(*it, proto); + } } }; -template <> -class TensorProtoHelper : public std::true_type { - public: - static DataType GetDataType() { return DataType::DT_BOOL; } - static void AddValue(bool value, TensorProto* proto) { - proto->mutable_bool_val()->Add(value); - } -}; } // namespace internal // Creates a 'TensorProto' with specified shape and values. @@ -152,9 +146,7 @@ CreateTensorProto(const std::vector& values, using TypeHelper = internal::TensorProtoHelper; tensor.set_dtype(TypeHelper::GetDataType()); internal::SetTensorProtoShape(shape, tensor.mutable_tensor_shape()); - for (const auto& value : values) { - TypeHelper::AddValue(value, &tensor); - } + TypeHelper::AddValues(values.begin(), values.end(), &tensor); return tensor; } -- GitLab From f248bbf20a30182e1bccb761638565326c97ef8a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 18:20:08 -0800 Subject: [PATCH 106/351] Update ops-related pbtxt files. PiperOrigin-RevId: 233870616 --- .../core/ops/compat/ops_history.v1.pbtxt | 3389 ++++++++-- tensorflow/core/ops/ops.pbtxt | 5541 ++++++++++++----- 2 files changed, 6604 insertions(+), 2326 deletions(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 6f0a812992..054d96e2f1 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -1546,6 +1546,43 @@ op { } is_stateful: true } +op { + name: "AllToAll" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_FLOAT + } + } + } + attr { + name: "concat_dimension" + type: "int" + } + attr { + name: "split_dimension" + type: "int" + } + attr { + name: "split_count" + type: "int" + } +} op { name: "Angle" input_arg { @@ -12910,6 +12947,46 @@ op { } is_stateful: true } +op { + name: "CollectivePermute" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "source_target_pairs" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "CollectiveReduce" input_arg { @@ -13656,6 +13733,35 @@ op { } is_stateful: true } +op { + name: "ConfigureDistributedTPU" + output_arg { + name: "topology" + type: DT_STRING + } + attr { + name: "embedding_config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "tpu_embedding_config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "is_global_init" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "Conj" input_arg { @@ -16412,6 +16518,31 @@ op { } } } +op { + name: "CrossReplicaSum" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_FLOAT + } + } + } +} op { name: "CudnnRNN" input_arg { @@ -21970,6 +22101,124 @@ op { type: DT_STRING } } +op { + name: "EnqueueTPUEmbeddingIntegerBatch" + input_arg { + name: "batch" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "EnqueueTPUEmbeddingSparseBatch" + input_arg { + name: "sample_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "embedding_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "aggregation_weights" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "combiners" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true +} +op { + name: "EnqueueTPUEmbeddingSparseTensorBatch" + input_arg { + name: "sample_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "embedding_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "aggregation_weights" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "combiners" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "table_ids" + type: "list(int)" + } + is_stateful: true +} op { name: "EnsureShape" input_arg { @@ -29523,6 +29772,108 @@ op { } } } +op { + name: "InfeedDequeue" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + is_stateful: true +} +op { + name: "InfeedDequeueTuple" + output_arg { + name: "outputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + is_stateful: true +} +op { + name: "InfeedEnqueue" + input_arg { + name: "input" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + default_value { + shape { + } + } + } + attr { + name: "layout" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "InfeedEnqueueTuple" + input_arg { + name: "inputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + attr { + name: "layouts" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} op { name: "InitializeTable" input_arg { @@ -31868,1072 +32219,1469 @@ op { is_stateful: true } op { - name: "Log" + name: "LoadTPUEmbeddingADAMParameters" input_arg { - name: "x" - type_attr: "T" - } - output_arg { - name: "y" - type_attr: "T" - } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "parameters" + type: DT_FLOAT } -} -op { - name: "Log" input_arg { - name: "x" - type_attr: "T" + name: "momenta" + type: DT_FLOAT } - output_arg { - name: "y" - type_attr: "T" + input_arg { + name: "velocities" + type: DT_FLOAT } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "table_id" + type: "int" + default_value { + i: -1 } + has_minimum: true + minimum: -1 } -} -op { - name: "Log" - input_arg { - name: "x" - type_attr: "T" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } - output_arg { - name: "y" - type_attr: "T" + attr { + name: "num_shards" + type: "int" } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_BFLOAT16 - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "Log1p" + name: "LoadTPUEmbeddingADAMParametersGradAccumDebug" input_arg { - name: "x" - type_attr: "T" - } - output_arg { - name: "y" - type_attr: "T" + name: "parameters" + type: DT_FLOAT } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + input_arg { + name: "momenta" + type: DT_FLOAT } -} -op { - name: "Log1p" input_arg { - name: "x" - type_attr: "T" + name: "velocities" + type: DT_FLOAT } - output_arg { - name: "y" - type_attr: "T" + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "table_id" + type: "int" + default_value { + i: -1 } + has_minimum: true + minimum: -1 } -} -op { - name: "Log1p" - input_arg { - name: "x" - type_attr: "T" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } - output_arg { - name: "y" - type_attr: "T" + attr { + name: "num_shards" + type: "int" } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_BFLOAT16 - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LogMatrixDeterminant" + name: "LoadTPUEmbeddingAdadeltaParameters" input_arg { - name: "input" - type_attr: "T" + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "sign" - type_attr: "T" + input_arg { + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "log_abs_determinant" - type_attr: "T" + input_arg { + name: "updates" + type: DT_FLOAT } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "table_id" + type: "int" + default_value { + i: -1 } + has_minimum: true + minimum: -1 } -} -op { - name: "LogMatrixDeterminant" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "sign" - type_attr: "T" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } - output_arg { - name: "log_abs_determinant" - type_attr: "T" + attr { + name: "num_shards" + type: "int" } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LogSoftmax" + name: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug" input_arg { - name: "logits" - type_attr: "T" + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "logsoftmax" - type_attr: "T" + input_arg { + name: "accumulators" + type: DT_FLOAT } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - } - } + input_arg { + name: "updates" + type: DT_FLOAT } -} -op { - name: "LogSoftmax" input_arg { - name: "logits" - type_attr: "T" + name: "gradient_accumulators" + type: DT_FLOAT } - output_arg { - name: "logsoftmax" - type_attr: "T" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - type: DT_DOUBLE - } + name: "table_name" + type: "string" + default_value { + s: "" } } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true } op { - name: "LogUniformCandidateSampler" + name: "LoadTPUEmbeddingAdagradParameters" input_arg { - name: "true_classes" - type: DT_INT64 - } - output_arg { - name: "sampled_candidates" - type: DT_INT64 - } - output_arg { - name: "true_expected_count" + name: "parameters" type: DT_FLOAT } - output_arg { - name: "sampled_expected_count" + input_arg { + name: "accumulators" type: DT_FLOAT } attr { - name: "num_true" + name: "table_id" type: "int" + default_value { + i: -1 + } has_minimum: true - minimum: 1 + minimum: -1 } attr { - name: "num_sampled" - type: "int" - has_minimum: true - minimum: 1 + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "unique" - type: "bool" + name: "num_shards" + type: "int" } attr { - name: "range_max" + name: "shard_id" type: "int" - has_minimum: true - minimum: 1 + } + is_stateful: true +} +op { + name: "LoadTPUEmbeddingAdagradParametersGradAccumDebug" + input_arg { + name: "parameters" + type: DT_FLOAT + } + input_arg { + name: "accumulators" + type: DT_FLOAT + } + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "seed" + name: "table_id" type: "int" default_value { - i: 0 + i: -1 } + has_minimum: true + minimum: -1 } attr { - name: "seed2" - type: "int" + name: "table_name" + type: "string" default_value { - i: 0 + s: "" } } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true } op { - name: "LogUniformCandidateSampler" + name: "LoadTPUEmbeddingCenteredRMSPropParameters" input_arg { - name: "true_classes" - type: DT_INT64 + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "sampled_candidates" - type: DT_INT64 + input_arg { + name: "ms" + type: DT_FLOAT } - output_arg { - name: "true_expected_count" + input_arg { + name: "mom" type: DT_FLOAT } - output_arg { - name: "sampled_expected_count" + input_arg { + name: "mg" type: DT_FLOAT } attr { - name: "num_true" + name: "table_id" type: "int" + default_value { + i: -1 + } has_minimum: true - minimum: 1 + minimum: -1 } attr { - name: "num_sampled" - type: "int" - has_minimum: true - minimum: 1 + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "unique" - type: "bool" + name: "num_shards" + type: "int" } attr { - name: "range_max" + name: "shard_id" type: "int" - has_minimum: true - minimum: 1 + } + is_stateful: true +} +op { + name: "LoadTPUEmbeddingFTRLParameters" + input_arg { + name: "parameters" + type: DT_FLOAT + } + input_arg { + name: "accumulators" + type: DT_FLOAT + } + input_arg { + name: "linears" + type: DT_FLOAT } attr { - name: "seed" + name: "table_id" type: "int" default_value { - i: 0 + i: -1 } + has_minimum: true + minimum: -1 } attr { - name: "seed2" - type: "int" + name: "table_name" + type: "string" default_value { - i: 0 + s: "" } } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } is_stateful: true } op { - name: "LogicalAnd" + name: "LoadTPUEmbeddingFTRLParametersGradAccumDebug" input_arg { - name: "x" - type: DT_BOOL + name: "parameters" + type: DT_FLOAT } input_arg { - name: "y" - type: DT_BOOL + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "z" - type: DT_BOOL + input_arg { + name: "linears" + type: DT_FLOAT } - is_commutative: true -} -op { - name: "LogicalNot" input_arg { - name: "x" - type: DT_BOOL + name: "gradient_accumulators" + type: DT_FLOAT } - output_arg { - name: "y" - type: DT_BOOL + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LogicalOr" + name: "LoadTPUEmbeddingMDLAdagradLightParameters" input_arg { - name: "x" - type: DT_BOOL + name: "parameters" + type: DT_FLOAT } input_arg { - name: "y" - type: DT_BOOL + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "z" - type: DT_BOOL + input_arg { + name: "weights" + type: DT_FLOAT } - is_commutative: true -} -op { - name: "LookupTableExport" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "benefits" + type: DT_FLOAT } - output_arg { - name: "keys" - type_attr: "Tkeys" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } - output_arg { - name: "values" - type_attr: "Tvalues" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tkeys" - type: "type" + name: "num_shards" + type: "int" } attr { - name: "Tvalues" - type: "type" + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LookupTableExportV2" + name: "LoadTPUEmbeddingMomentumParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "keys" - type_attr: "Tkeys" + input_arg { + name: "momenta" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tvalues" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tkeys" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tvalues" - type: "type" + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableFind" + name: "LoadTPUEmbeddingMomentumParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "momenta" + type: DT_FLOAT } input_arg { - name: "default_value" - type_attr: "Tout" + name: "gradient_accumulators" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tout" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tin" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tout" - type: "type" + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LookupTableFindV2" + name: "LoadTPUEmbeddingProximalAdagradParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } - input_arg { - name: "default_value" - type_attr: "Tout" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } - output_arg { - name: "values" - type_attr: "Tout" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tin" - type: "type" + name: "num_shards" + type: "int" } attr { - name: "Tout" - type: "type" + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableImport" + name: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LookupTableImportV2" + name: "LoadTPUEmbeddingRMSPropParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "ms" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "mom" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableInsert" + name: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "ms" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "mom" + type: DT_FLOAT + } + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LookupTableInsertV2" + name: "LoadTPUEmbeddingStochasticGradientDescentParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } - input_arg { - name: "keys" - type_attr: "Tin" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } - input_arg { - name: "values" - type_attr: "Tout" + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tin" - type: "type" + name: "num_shards" + type: "int" } attr { - name: "Tout" - type: "type" + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableRemoveV2" + name: "Log" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "x" + type_attr: "T" } - input_arg { - name: "keys" - type_attr: "Tin" + output_arg { + name: "y" + type_attr: "T" } attr { - name: "Tin" + name: "T" type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } - is_stateful: true } op { - name: "LookupTableSize" + name: "Log" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "x" + type_attr: "T" } output_arg { - name: "size" - type: DT_INT64 + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } } op { - name: "LookupTableSizeV2" + name: "Log" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "x" + type_attr: "T" } output_arg { - name: "size" - type: DT_INT64 + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } - is_stateful: true } op { - name: "LoopCond" + name: "Log1p" input_arg { - name: "input" - type: DT_BOOL + name: "x" + type_attr: "T" } output_arg { - name: "output" - type: DT_BOOL + name: "y" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } } op { - name: "LowerBound" - input_arg { - name: "sorted_inputs" - type_attr: "T" - } + name: "Log1p" input_arg { - name: "values" + name: "x" type_attr: "T" } output_arg { - name: "output" - type_attr: "out_type" + name: "y" + type_attr: "T" } attr { name: "T" type: "type" - } - attr { - name: "out_type" - type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 - type: DT_INT64 + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "Lu" + name: "Log1p" input_arg { - name: "input" + name: "x" type_attr: "T" } output_arg { - name: "lu" + name: "y" type_attr: "T" } - output_arg { - name: "p" - type_attr: "output_idx_type" - } attr { name: "T" type: "type" allowed_values { list { - type: DT_DOUBLE + type: DT_BFLOAT16 + type: DT_HALF type: DT_FLOAT + type: DT_DOUBLE type: DT_COMPLEX64 type: DT_COMPLEX128 } } } +} +op { + name: "LogMatrixDeterminant" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "sign" + type_attr: "T" + } + output_arg { + name: "log_abs_determinant" + type_attr: "T" + } attr { - name: "output_idx_type" + name: "T" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 - type: DT_INT64 + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "Lu" + name: "LogMatrixDeterminant" input_arg { name: "input" type_attr: "T" } output_arg { - name: "lu" + name: "sign" type_attr: "T" } output_arg { - name: "p" - type_attr: "output_idx_type" + name: "log_abs_determinant" + type_attr: "T" } attr { name: "T" type: "type" allowed_values { list { - type: DT_DOUBLE - type: DT_FLOAT type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE type: DT_COMPLEX64 type: DT_COMPLEX128 } } } +} +op { + name: "LogSoftmax" + input_arg { + name: "logits" + type_attr: "T" + } + output_arg { + name: "logsoftmax" + type_attr: "T" + } attr { - name: "output_idx_type" + name: "T" type: "type" - default_value { - type: DT_INT32 - } allowed_values { list { - type: DT_INT32 - type: DT_INT64 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE } } } } op { - name: "MakeIterator" + name: "LogSoftmax" input_arg { - name: "dataset" - type: DT_VARIANT + name: "logits" + type_attr: "T" } - input_arg { - name: "iterator" - type: DT_RESOURCE + output_arg { + name: "logsoftmax" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } } - is_stateful: true } op { - name: "MapClear" + name: "LogUniformCandidateSampler" + input_arg { + name: "true_classes" + type: DT_INT64 + } + output_arg { + name: "sampled_candidates" + type: DT_INT64 + } + output_arg { + name: "true_expected_count" + type: DT_FLOAT + } + output_arg { + name: "sampled_expected_count" + type: DT_FLOAT + } attr { - name: "capacity" + name: "num_true" type: "int" - default_value { - i: 0 - } has_minimum: true + minimum: 1 } attr { - name: "memory_limit" + name: "num_sampled" type: "int" - default_value { - i: 0 - } has_minimum: true + minimum: 1 } attr { - name: "dtypes" - type: "list(type)" + name: "unique" + type: "bool" } attr { - name: "container" - type: "string" + name: "range_max" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "seed" + type: "int" default_value { - s: "" + i: 0 } } attr { - name: "shared_name" - type: "string" + name: "seed2" + type: "int" default_value { - s: "" + i: 0 } } - is_stateful: true } op { - name: "MapDataset" - input_arg { - name: "input_dataset" - type: DT_VARIANT - } + name: "LogUniformCandidateSampler" input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "true_classes" + type: DT_INT64 } output_arg { - name: "handle" - type: DT_VARIANT + name: "sampled_candidates" + type: DT_INT64 } - attr { - name: "f" - type: "func" + output_arg { + name: "true_expected_count" + type: DT_FLOAT + } + output_arg { + name: "sampled_expected_count" + type: DT_FLOAT } attr { - name: "Targuments" - type: "list(type)" + name: "num_true" + type: "int" has_minimum: true + minimum: 1 } attr { - name: "output_types" - type: "list(type)" + name: "num_sampled" + type: "int" has_minimum: true minimum: 1 } attr { - name: "output_shapes" - type: "list(shape)" + name: "unique" + type: "bool" + } + attr { + name: "range_max" + type: "int" has_minimum: true minimum: 1 } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } is_stateful: true } op { - name: "MapDataset" + name: "LogicalAnd" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "x" + type: DT_BOOL } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "y" + type: DT_BOOL } output_arg { - name: "handle" - type: DT_VARIANT + name: "z" + type: DT_BOOL + } + is_commutative: true +} +op { + name: "LogicalNot" + input_arg { + name: "x" + type: DT_BOOL + } + output_arg { + name: "y" + type: DT_BOOL + } +} +op { + name: "LogicalOr" + input_arg { + name: "x" + type: DT_BOOL + } + input_arg { + name: "y" + type: DT_BOOL + } + output_arg { + name: "z" + type: DT_BOOL + } + is_commutative: true +} +op { + name: "LookupTableExport" + input_arg { + name: "table_handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "keys" + type_attr: "Tkeys" + } + output_arg { + name: "values" + type_attr: "Tvalues" } attr { - name: "f" - type: "func" + name: "Tkeys" + type: "type" } attr { - name: "Targuments" - type: "list(type)" - has_minimum: true + name: "Tvalues" + type: "type" + } +} +op { + name: "LookupTableExportV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + output_arg { + name: "keys" + type_attr: "Tkeys" + } + output_arg { + name: "values" + type_attr: "Tvalues" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "Tkeys" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Tvalues" + type: "type" } + is_stateful: true } op { - name: "MapDataset" + name: "LookupTableFind" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "table_handle" + type: DT_STRING + is_ref: true } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "default_value" + type_attr: "Tout" } output_arg { - name: "handle" - type: DT_VARIANT + name: "values" + type_attr: "Tout" } attr { - name: "f" - type: "func" + name: "Tin" + type: "type" } attr { - name: "Targuments" - type: "list(type)" - has_minimum: true + name: "Tout" + type: "type" } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 +} +op { + name: "LookupTableFindV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "default_value" + type_attr: "Tout" + } + output_arg { + name: "values" + type_attr: "Tout" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Tin" + type: "type" } attr { - name: "use_inter_op_parallelism" - type: "bool" - default_value { - b: true - } + name: "Tout" + type: "type" } + is_stateful: true } op { - name: "MapDataset" + name: "LookupTableImport" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "table_handle" + type: DT_STRING + is_ref: true } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" + name: "keys" + type_attr: "Tin" } - output_arg { - name: "handle" - type: DT_VARIANT + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "f" - type: "func" + name: "Tin" + type: "type" } attr { - name: "Targuments" - type: "list(type)" - has_minimum: true + name: "Tout" + type: "type" + } +} +op { + name: "LookupTableImportV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "Tin" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Tout" + type: "type" + } + is_stateful: true +} +op { + name: "LookupTableInsert" + input_arg { + name: "table_handle" + type: DT_STRING + is_ref: true + } + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "use_inter_op_parallelism" - type: "bool" - default_value { - b: true - } + name: "Tin" + type: "type" } attr { - name: "preserve_cardinality" - type: "bool" - default_value { - b: false - } + name: "Tout" + type: "type" } } op { - name: "MapDefun" + name: "LookupTableInsertV2" input_arg { - name: "arguments" - type_list_attr: "Targuments" + name: "table_handle" + type: DT_RESOURCE } - output_arg { - name: "output" - type_list_attr: "output_types" + input_arg { + name: "keys" + type_attr: "Tin" } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "Tin" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Tout" + type: "type" + } + is_stateful: true +} +op { + name: "LookupTableRemoveV2" + input_arg { + name: "table_handle" + type: DT_RESOURCE + } + input_arg { + name: "keys" + type_attr: "Tin" } attr { - name: "f" - type: "func" + name: "Tin" + type: "type" } + is_stateful: true } op { - name: "MapDefun" + name: "LookupTableSize" input_arg { - name: "arguments" - type_list_attr: "Targuments" + name: "table_handle" + type: DT_STRING + is_ref: true + } + output_arg { + name: "size" + type: DT_INT64 } +} +op { + name: "LookupTableSizeV2" input_arg { - name: "captured_inputs" - type_list_attr: "Tcaptured" + name: "table_handle" + type: DT_RESOURCE + } + output_arg { + name: "size" + type: DT_INT64 + } + is_stateful: true +} +op { + name: "LoopCond" + input_arg { + name: "input" + type: DT_BOOL } output_arg { name: "output" - type_list_attr: "output_types" + type: DT_BOOL + } +} +op { + name: "LowerBound" + input_arg { + name: "sorted_inputs" + type_attr: "T" + } + input_arg { + name: "values" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "out_type" } attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" } attr { - name: "Tcaptured" - type: "list(type)" + name: "out_type" + type: "type" default_value { + type: DT_INT32 + } + allowed_values { list { + type: DT_INT32 + type: DT_INT64 } } - has_minimum: true } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 +} +op { + name: "Lu" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "lu" + type_attr: "T" + } + output_arg { + name: "p" + type_attr: "output_idx_type" } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } } attr { - name: "f" - type: "func" + name: "output_idx_type" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } } } op { - name: "MapIncompleteSize" + name: "Lu" + input_arg { + name: "input" + type_attr: "T" + } output_arg { - name: "size" - type: DT_INT32 + name: "lu" + type_attr: "T" + } + output_arg { + name: "p" + type_attr: "output_idx_type" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + attr { + name: "output_idx_type" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } +} +op { + name: "MakeIterator" + input_arg { + name: "dataset" + type: DT_VARIANT + } + input_arg { + name: "iterator" + type: DT_RESOURCE } + is_stateful: true +} +op { + name: "MapClear" attr { name: "capacity" type: "int" @@ -32971,21 +33719,308 @@ op { is_stateful: true } op { - name: "MapPeek" + name: "MapDataset" input_arg { - name: "key" - type: DT_INT64 + name: "input_dataset" + type: DT_VARIANT } input_arg { - name: "indices" - type: DT_INT32 + name: "other_arguments" + type_list_attr: "Targuments" } output_arg { - name: "values" - type_list_attr: "dtypes" + name: "handle" + type: DT_VARIANT } attr { - name: "capacity" + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "MapDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "MapDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "use_inter_op_parallelism" + type: "bool" + default_value { + b: true + } + } +} +op { + name: "MapDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "f" + type: "func" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "use_inter_op_parallelism" + type: "bool" + default_value { + b: true + } + } + attr { + name: "preserve_cardinality" + type: "bool" + default_value { + b: false + } + } +} +op { + name: "MapDefun" + input_arg { + name: "arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "output" + type_list_attr: "output_types" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "f" + type: "func" + } +} +op { + name: "MapDefun" + input_arg { + name: "arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "captured_inputs" + type_list_attr: "Tcaptured" + } + output_arg { + name: "output" + type_list_attr: "output_types" + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Tcaptured" + type: "list(type)" + default_value { + list { + } + } + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "f" + type: "func" + } +} +op { + name: "MapIncompleteSize" + output_arg { + name: "size" + type: DT_INT32 + } + attr { + name: "capacity" + type: "int" + default_value { + i: 0 + } + has_minimum: true + } + attr { + name: "memory_limit" + type: "int" + default_value { + i: 0 + } + has_minimum: true + } + attr { + name: "dtypes" + type: "list(type)" + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "MapPeek" + input_arg { + name: "key" + type: DT_INT64 + } + input_arg { + name: "indices" + type: DT_INT32 + } + output_arg { + name: "values" + type_list_attr: "dtypes" + } + attr { + name: "capacity" type: "int" default_value { i: 0 @@ -40866,6 +41901,80 @@ op { } is_stateful: true } +op { + name: "OutfeedDequeue" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "OutfeedDequeueTuple" + output_arg { + name: "outputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "OutfeedEnqueue" + input_arg { + name: "input" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} +op { + name: "OutfeedEnqueueTuple" + input_arg { + name: "inputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "Pack" input_arg { @@ -49710,6 +50819,25 @@ op { } is_stateful: true } +op { + name: "RecvTPUEmbeddingActivations" + output_arg { + name: "outputs" + type: DT_FLOAT + number_attr: "num_outputs" + } + attr { + name: "num_outputs" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "config" + type: "string" + } + is_stateful: true +} op { name: "ReduceDataset" input_arg { @@ -59631,6 +60759,690 @@ op { } is_stateful: true } +op { + name: "RetrieveTPUEmbeddingADAMParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "velocities" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "velocities" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdadeltaParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "updates" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "updates" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdagradParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingCenteredRMSPropParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + output_arg { + name: "mg" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingFTRLParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "linears" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "linears" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMDLAdagradLightParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "weights" + type: DT_FLOAT + } + output_arg { + name: "benefits" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMomentumParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingProximalAdagradParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingRMSPropParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} op { name: "Reverse" input_arg { @@ -64624,6 +66436,38 @@ op { } } } +op { + name: "SendTPUEmbeddingGradients" + input_arg { + name: "inputs" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "learning_rates" + type: DT_FLOAT + number_attr: "NN" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "NN" + type: "int" + default_value { + i: 0 + } + has_minimum: true + } + attr { + name: "config" + type: "string" + } + is_stateful: true +} op { name: "SerializeIterator" input_arg { @@ -65091,6 +66935,10 @@ op { minimum: 1 } } +op { + name: "ShutdownDistributedTPU" + is_stateful: true +} op { name: "Sigmoid" input_arg { @@ -77766,6 +79614,285 @@ op { } is_stateful: true } +op { + name: "TPUCompilationResult" + output_arg { + name: "output" + type: DT_STRING + } +} +op { + name: "TPUEmbeddingActivations" + input_arg { + name: "embedding_variable" + type: DT_FLOAT + } + input_arg { + name: "sliced_activations" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + has_minimum: true + } + attr { + name: "lookup_id" + type: "int" + has_minimum: true + } +} +op { + name: "TPUOrdinalSelector" + output_arg { + name: "device_ordinals" + type: DT_INT32 + } + is_stateful: true +} +op { + name: "TPUPartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + input_arg { + name: "device_ordinal" + type: DT_INT32 + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } +} +op { + name: "TPUReplicate" + input_arg { + name: "inputs" + type_list_attr: "Tinputs" + } + input_arg { + name: "broadcast_inputs" + type_list_attr: "Tbroadcast_inputs" + } + input_arg { + name: "variables" + type: DT_RESOURCE + number_attr: "NumVariables" + } + input_arg { + name: "guaranteed_constants" + type_list_attr: "Tguaranteed_constants" + } + output_arg { + name: "outputs" + type_list_attr: "output_types" + } + attr { + name: "computation" + type: "func" + } + attr { + name: "num_replicas" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_cores_per_replica" + type: "int" + default_value { + i: 1 + } + } + attr { + name: "topology" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_tpu" + type: "bool" + default_value { + b: true + } + } + attr { + name: "device_assignment" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "host_compute_core" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "Tinputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tbroadcast_inputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "NumVariables" + type: "int" + has_minimum: true + } + attr { + name: "Tguaranteed_constants" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + } + attr { + name: "padding_map" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true +} +op { + name: "TPUReplicateMetadata" + attr { + name: "num_replicas" + type: "int" + has_minimum: true + } + attr { + name: "num_cores_per_replica" + type: "int" + default_value { + i: 1 + } + } + attr { + name: "topology" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_tpu" + type: "bool" + default_value { + b: true + } + } + attr { + name: "device_assignment" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "computation_shape" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "host_compute_core" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "padding_map" + type: "list(string)" + default_value { + list { + } + } + } +} +op { + name: "TPUReplicatedInput" + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } +} +op { + name: "TPUReplicatedOutput" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "outputs" + type_attr: "T" + number_attr: "num_replicas" + } + attr { + name: "num_replicas" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } +} op { name: "TakeDataset" input_arg { @@ -83067,6 +85194,18 @@ op { minimum: 1 } } +op { + name: "WorkerHeartbeat" + input_arg { + name: "request" + type: DT_STRING + } + output_arg { + name: "response" + type: DT_STRING + } + is_stateful: true +} op { name: "WrapDatasetVariant" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 607e6ee86c..20cc2596d9 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -651,6 +651,43 @@ op { } is_stateful: true } +op { + name: "AllToAll" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_FLOAT + } + } + } + attr { + name: "concat_dimension" + type: "int" + } + attr { + name: "split_dimension" + type: "int" + } + attr { + name: "split_count" + type: "int" + } +} op { name: "Angle" input_arg { @@ -5478,6 +5515,46 @@ op { } is_stateful: true } +op { + name: "CollectivePermute" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "source_target_pairs" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} op { name: "CollectiveReduce" input_arg { @@ -5929,6 +6006,35 @@ op { } is_stateful: true } +op { + name: "ConfigureDistributedTPU" + output_arg { + name: "topology" + type: DT_STRING + } + attr { + name: "embedding_config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "tpu_embedding_config" + type: "string" + default_value { + s: "" + } + } + attr { + name: "is_global_init" + type: "bool" + default_value { + b: false + } + } + is_stateful: true +} op { name: "Conj" input_arg { @@ -7012,6 +7118,31 @@ op { } } } +op { + name: "CrossReplicaSum" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_FLOAT + } + } + } +} op { name: "CudnnRNN" input_arg { @@ -10242,6 +10373,124 @@ op { type: DT_STRING } } +op { + name: "EnqueueTPUEmbeddingIntegerBatch" + input_arg { + name: "batch" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "EnqueueTPUEmbeddingSparseBatch" + input_arg { + name: "sample_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "embedding_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "aggregation_weights" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "combiners" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true +} +op { + name: "EnqueueTPUEmbeddingSparseTensorBatch" + input_arg { + name: "sample_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "embedding_indices" + type: DT_INT32 + number_attr: "N" + } + input_arg { + name: "aggregation_weights" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "mode_override" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + attr { + name: "combiners" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "table_ids" + type: "list(int)" + } + is_stateful: true +} op { name: "EnsureShape" input_arg { @@ -14700,6 +14949,108 @@ op { } } } +op { + name: "InfeedDequeue" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + is_stateful: true +} +op { + name: "InfeedDequeueTuple" + output_arg { + name: "outputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + is_stateful: true +} +op { + name: "InfeedEnqueue" + input_arg { + name: "input" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + default_value { + shape { + } + } + } + attr { + name: "layout" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "InfeedEnqueueTuple" + input_arg { + name: "inputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + attr { + name: "layouts" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} op { name: "InitializeTable" input_arg { @@ -15933,1268 +16284,1185 @@ op { is_stateful: true } op { - name: "Log" + name: "LoadTPUEmbeddingADAMParameters" input_arg { - name: "x" - type_attr: "T" - } - output_arg { - name: "y" - type_attr: "T" - } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_BFLOAT16 - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "parameters" + type: DT_FLOAT } -} -op { - name: "Log1p" input_arg { - name: "x" - type_attr: "T" + name: "momenta" + type: DT_FLOAT } - output_arg { - name: "y" - type_attr: "T" + input_arg { + name: "velocities" + type: DT_FLOAT } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_BFLOAT16 - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "table_id" + type: "int" + default_value { + i: -1 } - } -} -op { - name: "LogMatrixDeterminant" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "sign" - type_attr: "T" - } - output_arg { - name: "log_abs_determinant" - type_attr: "T" + has_minimum: true + minimum: -1 } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "table_name" + type: "string" + default_value { + s: "" } } -} -op { - name: "LogSoftmax" - input_arg { - name: "logits" - type_attr: "T" - } - output_arg { - name: "logsoftmax" - type_attr: "T" + attr { + name: "num_shards" + type: "int" } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - type: DT_DOUBLE - } - } + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LogUniformCandidateSampler" + name: "LoadTPUEmbeddingADAMParametersGradAccumDebug" input_arg { - name: "true_classes" - type: DT_INT64 + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "sampled_candidates" - type: DT_INT64 + input_arg { + name: "momenta" + type: DT_FLOAT } - output_arg { - name: "true_expected_count" + input_arg { + name: "velocities" type: DT_FLOAT } - output_arg { - name: "sampled_expected_count" + input_arg { + name: "gradient_accumulators" type: DT_FLOAT } attr { - name: "num_true" + name: "table_id" type: "int" + default_value { + i: -1 + } has_minimum: true - minimum: 1 + minimum: -1 } attr { - name: "num_sampled" - type: "int" - has_minimum: true - minimum: 1 + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "unique" - type: "bool" + name: "num_shards" + type: "int" } attr { - name: "range_max" + name: "shard_id" type: "int" - has_minimum: true - minimum: 1 + } + is_stateful: true +} +op { + name: "LoadTPUEmbeddingAdadeltaParameters" + input_arg { + name: "parameters" + type: DT_FLOAT + } + input_arg { + name: "accumulators" + type: DT_FLOAT + } + input_arg { + name: "updates" + type: DT_FLOAT } attr { - name: "seed" + name: "table_id" type: "int" default_value { - i: 0 + i: -1 } + has_minimum: true + minimum: -1 } attr { - name: "seed2" - type: "int" + name: "table_name" + type: "string" default_value { - i: 0 + s: "" } } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } is_stateful: true } op { - name: "LogicalAnd" + name: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug" input_arg { - name: "x" - type: DT_BOOL + name: "parameters" + type: DT_FLOAT } input_arg { - name: "y" - type: DT_BOOL + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "z" - type: DT_BOOL + input_arg { + name: "updates" + type: DT_FLOAT } - is_commutative: true -} -op { - name: "LogicalNot" input_arg { - name: "x" - type: DT_BOOL + name: "gradient_accumulators" + type: DT_FLOAT } - output_arg { - name: "y" - type: DT_BOOL + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } -} -op { - name: "LogicalOr" - input_arg { - name: "x" - type: DT_BOOL + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } } - input_arg { - name: "y" - type: DT_BOOL + attr { + name: "num_shards" + type: "int" } - output_arg { - name: "z" - type: DT_BOOL + attr { + name: "shard_id" + type: "int" } - is_commutative: true + is_stateful: true } op { - name: "LookupTableExport" + name: "LoadTPUEmbeddingAdagradParameters" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "keys" - type_attr: "Tkeys" + input_arg { + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tvalues" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tkeys" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tvalues" - type: "type" + name: "num_shards" + type: "int" } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true } op { - name: "LookupTableExportV2" + name: "LoadTPUEmbeddingAdagradParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } - output_arg { - name: "keys" - type_attr: "Tkeys" + input_arg { + name: "accumulators" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tvalues" + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tkeys" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tvalues" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableFind" + name: "LoadTPUEmbeddingCenteredRMSPropParameters" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "ms" + type: DT_FLOAT } input_arg { - name: "default_value" - type_attr: "Tout" + name: "mom" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tout" + input_arg { + name: "mg" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true } op { - name: "LookupTableFindV2" + name: "LoadTPUEmbeddingFTRLParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } input_arg { - name: "default_value" - type_attr: "Tout" + name: "linears" + type: DT_FLOAT } - output_arg { - name: "values" - type_attr: "Tout" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tin" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tout" - type: "type" + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableImport" + name: "LoadTPUEmbeddingFTRLParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "linears" + type: DT_FLOAT + } + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "LookupTableImportV2" + name: "LoadTPUEmbeddingMDLAdagradLightParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "weights" + type: DT_FLOAT + } + input_arg { + name: "benefits" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableInsert" + name: "LoadTPUEmbeddingMomentumParameters" input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "momenta" + type: DT_FLOAT } - input_arg { - name: "values" - type_attr: "Tout" + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tin" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "Tout" - type: "type" + name: "num_shards" + type: "int" } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true } op { - name: "LookupTableInsertV2" + name: "LoadTPUEmbeddingMomentumParametersGradAccumDebug" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "momenta" + type: DT_FLOAT } input_arg { - name: "values" - type_attr: "Tout" + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "Tout" - type: "type" + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "LookupTableRemoveV2" + name: "LoadTPUEmbeddingProximalAdagradParameters" input_arg { - name: "table_handle" - type: DT_RESOURCE + name: "parameters" + type: DT_FLOAT } input_arg { - name: "keys" - type_attr: "Tin" + name: "accumulators" + type: DT_FLOAT } attr { - name: "Tin" - type: "type" - } - is_stateful: true -} -op { - name: "LookupTableSize" - input_arg { - name: "table_handle" - type: DT_STRING - is_ref: true - } - output_arg { - name: "size" - type: DT_INT64 - } -} -op { - name: "LookupTableSizeV2" - input_arg { - name: "table_handle" - type: DT_RESOURCE - } - output_arg { - name: "size" - type: DT_INT64 - } - is_stateful: true -} -op { - name: "LoopCond" - input_arg { - name: "input" - type: DT_BOOL - } - output_arg { - name: "output" - type: DT_BOOL - } -} -op { - name: "LowerBound" - input_arg { - name: "sorted_inputs" - type_attr: "T" - } - input_arg { - name: "values" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "out_type" - } - attr { - name: "T" - type: "type" + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 } attr { - name: "out_type" - type: "type" + name: "table_name" + type: "string" default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + s: "" } } -} -op { - name: "Lu" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "lu" - type_attr: "T" - } - output_arg { - name: "p" - type_attr: "output_idx_type" - } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "num_shards" + type: "int" } attr { - name: "output_idx_type" - type: "type" - default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "MakeIterator" + name: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug" input_arg { - name: "dataset" - type: DT_VARIANT + name: "parameters" + type: DT_FLOAT } input_arg { - name: "iterator" - type: DT_RESOURCE + name: "accumulators" + type: DT_FLOAT } - is_stateful: true -} -op { - name: "MapClear" - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "memory_limit" + name: "table_id" type: "int" default_value { - i: 0 + i: -1 } has_minimum: true + minimum: -1 } attr { - name: "dtypes" - type: "list(type)" - } - attr { - name: "container" + name: "table_name" type: "string" default_value { s: "" } } attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "MapDataset" + name: "LoadTPUEmbeddingRMSPropParameters" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "parameters" + type: DT_FLOAT } input_arg { - name: "other_arguments" - type_list_attr: "Targuments" - } - output_arg { - name: "handle" - type: DT_VARIANT - } - attr { - name: "f" - type: "func" - } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true + name: "ms" + type: DT_FLOAT } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "mom" + type: DT_FLOAT } attr { - name: "output_shapes" - type: "list(shape)" + name: "table_id" + type: "int" + default_value { + i: -1 + } has_minimum: true - minimum: 1 + minimum: -1 } attr { - name: "use_inter_op_parallelism" - type: "bool" + name: "table_name" + type: "string" default_value { - b: true + s: "" } } attr { - name: "preserve_cardinality" - type: "bool" - default_value { - b: false - } + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "MapDefun" + name: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug" input_arg { - name: "arguments" - type_list_attr: "Targuments" + name: "parameters" + type: DT_FLOAT } input_arg { - name: "captured_inputs" - type_list_attr: "Tcaptured" + name: "ms" + type: DT_FLOAT } - output_arg { - name: "output" - type_list_attr: "output_types" + input_arg { + name: "mom" + type: DT_FLOAT } - attr { - name: "Targuments" - type: "list(type)" - has_minimum: true - minimum: 1 + input_arg { + name: "gradient_accumulators" + type: DT_FLOAT } attr { - name: "Tcaptured" - type: "list(type)" + name: "table_id" + type: "int" default_value { - list { - } + i: -1 } has_minimum: true + minimum: -1 } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "table_name" + type: "string" + default_value { + s: "" + } } attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "num_shards" + type: "int" } attr { - name: "f" - type: "func" + name: "shard_id" + type: "int" } + is_stateful: true } op { - name: "MapIncompleteSize" - output_arg { - name: "size" - type: DT_INT32 - } - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true + name: "LoadTPUEmbeddingStochasticGradientDescentParameters" + input_arg { + name: "parameters" + type: DT_FLOAT } attr { - name: "memory_limit" + name: "table_id" type: "int" default_value { - i: 0 + i: -1 } has_minimum: true + minimum: -1 } attr { - name: "dtypes" - type: "list(type)" - } - attr { - name: "container" + name: "table_name" type: "string" default_value { s: "" } } attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" } is_stateful: true } op { - name: "MapPeek" - input_arg { - name: "key" - type: DT_INT64 - } + name: "Log" input_arg { - name: "indices" - type: DT_INT32 + name: "x" + type_attr: "T" } output_arg { - name: "values" - type_list_attr: "dtypes" - } - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true + name: "y" + type_attr: "T" } attr { - name: "memory_limit" - type: "int" - default_value { - i: 0 + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } } - has_minimum: true } - attr { - name: "dtypes" - type: "list(type)" - has_minimum: true - minimum: 1 +} +op { + name: "Log1p" + input_arg { + name: "x" + type_attr: "T" } - attr { - name: "container" - type: "string" - default_value { - s: "" - } + output_arg { + name: "y" + type_attr: "T" } attr { - name: "shared_name" - type: "string" - default_value { - s: "" + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } } } - is_stateful: true } op { - name: "MapSize" + name: "LogMatrixDeterminant" + input_arg { + name: "input" + type_attr: "T" + } output_arg { - name: "size" - type: DT_INT32 + name: "sign" + type_attr: "T" } - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true + output_arg { + name: "log_abs_determinant" + type_attr: "T" } attr { - name: "memory_limit" - type: "int" - default_value { - i: 0 + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } } - has_minimum: true } - attr { - name: "dtypes" - type: "list(type)" +} +op { + name: "LogSoftmax" + input_arg { + name: "logits" + type_attr: "T" } - attr { - name: "container" - type: "string" - default_value { - s: "" - } + output_arg { + name: "logsoftmax" + type_attr: "T" } attr { - name: "shared_name" - type: "string" - default_value { - s: "" + name: "T" + type: "type" + allowed_values { + list { + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } } } - is_stateful: true } op { - name: "MapStage" + name: "LogUniformCandidateSampler" input_arg { - name: "key" + name: "true_classes" type: DT_INT64 } - input_arg { - name: "indices" - type: DT_INT32 + output_arg { + name: "sampled_candidates" + type: DT_INT64 } - input_arg { - name: "values" - type_list_attr: "fake_dtypes" + output_arg { + name: "true_expected_count" + type: DT_FLOAT + } + output_arg { + name: "sampled_expected_count" + type: DT_FLOAT } attr { - name: "capacity" + name: "num_true" type: "int" - default_value { - i: 0 - } has_minimum: true + minimum: 1 } attr { - name: "memory_limit" + name: "num_sampled" type: "int" - default_value { - i: 0 - } has_minimum: true + minimum: 1 } attr { - name: "dtypes" - type: "list(type)" + name: "unique" + type: "bool" } attr { - name: "fake_dtypes" - type: "list(type)" + name: "range_max" + type: "int" has_minimum: true minimum: 1 } attr { - name: "container" - type: "string" + name: "seed" + type: "int" default_value { - s: "" + i: 0 } } attr { - name: "shared_name" - type: "string" + name: "seed2" + type: "int" default_value { - s: "" + i: 0 } } is_stateful: true } op { - name: "MapUnstage" + name: "LogicalAnd" input_arg { - name: "key" - type: DT_INT64 + name: "x" + type: DT_BOOL } input_arg { - name: "indices" - type: DT_INT32 + name: "y" + type: DT_BOOL } output_arg { - name: "values" - type_list_attr: "dtypes" + name: "z" + type: DT_BOOL } - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true + is_commutative: true +} +op { + name: "LogicalNot" + input_arg { + name: "x" + type: DT_BOOL } - attr { - name: "memory_limit" - type: "int" - default_value { - i: 0 - } - has_minimum: true + output_arg { + name: "y" + type: DT_BOOL } - attr { - name: "dtypes" - type: "list(type)" - has_minimum: true - minimum: 1 +} +op { + name: "LogicalOr" + input_arg { + name: "x" + type: DT_BOOL } - attr { - name: "container" - type: "string" - default_value { - s: "" - } + input_arg { + name: "y" + type: DT_BOOL } - attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + output_arg { + name: "z" + type: DT_BOOL } - is_stateful: true + is_commutative: true } op { - name: "MapUnstageNoKey" + name: "LookupTableExport" input_arg { - name: "indices" - type: DT_INT32 + name: "table_handle" + type: DT_STRING + is_ref: true } output_arg { - name: "key" - type: DT_INT64 + name: "keys" + type_attr: "Tkeys" } output_arg { name: "values" - type_list_attr: "dtypes" - } - attr { - name: "capacity" - type: "int" - default_value { - i: 0 - } - has_minimum: true - } - attr { - name: "memory_limit" - type: "int" - default_value { - i: 0 - } - has_minimum: true - } - attr { - name: "dtypes" - type: "list(type)" - has_minimum: true - minimum: 1 + type_attr: "Tvalues" } attr { - name: "container" - type: "string" - default_value { - s: "" - } + name: "Tkeys" + type: "type" } attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + name: "Tvalues" + type: "type" } - is_stateful: true } op { - name: "MatMul" - input_arg { - name: "a" - type_attr: "T" - } + name: "LookupTableExportV2" input_arg { - name: "b" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } output_arg { - name: "product" - type_attr: "T" + name: "keys" + type_attr: "Tkeys" } - attr { - name: "transpose_a" - type: "bool" - default_value { - b: false - } + output_arg { + name: "values" + type_attr: "Tvalues" } attr { - name: "transpose_b" - type: "bool" - default_value { - b: false - } + name: "Tkeys" + type: "type" } attr { - name: "T" + name: "Tvalues" type: "type" - allowed_values { - list { - type: DT_BFLOAT16 - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } } + is_stateful: true } op { - name: "MatchingFiles" + name: "LookupTableFind" input_arg { - name: "pattern" + name: "table_handle" type: DT_STRING + is_ref: true + } + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "default_value" + type_attr: "Tout" } output_arg { - name: "filenames" - type: DT_STRING + name: "values" + type_attr: "Tout" + } + attr { + name: "Tin" + type: "type" + } + attr { + name: "Tout" + type: "type" } } op { - name: "MatrixBandPart" + name: "LookupTableFindV2" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } input_arg { - name: "num_lower" - type_attr: "Tindex" + name: "keys" + type_attr: "Tin" } input_arg { - name: "num_upper" - type_attr: "Tindex" + name: "default_value" + type_attr: "Tout" } output_arg { - name: "band" - type_attr: "T" + name: "values" + type_attr: "Tout" } attr { - name: "T" + name: "Tin" type: "type" } attr { - name: "Tindex" + name: "Tout" type: "type" - default_value { - type: DT_INT64 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } } + is_stateful: true } op { - name: "MatrixDeterminant" + name: "LookupTableImport" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_STRING + is_ref: true } - output_arg { - name: "output" - type_attr: "T" + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "T" + name: "Tin" + type: "type" + } + attr { + name: "Tout" type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_FLOAT - type: DT_DOUBLE - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } } } op { - name: "MatrixDiag" + name: "LookupTableImportV2" input_arg { - name: "diagonal" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } - output_arg { - name: "output" - type_attr: "T" + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "T" + name: "Tin" type: "type" } + attr { + name: "Tout" + type: "type" + } + is_stateful: true } op { - name: "MatrixDiagPart" + name: "LookupTableInsert" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_STRING + is_ref: true } - output_arg { - name: "diagonal" - type_attr: "T" + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "T" + name: "Tin" + type: "type" + } + attr { + name: "Tout" type: "type" } } op { - name: "MatrixExponential" + name: "LookupTableInsertV2" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } - output_arg { - name: "output" - type_attr: "T" + input_arg { + name: "keys" + type_attr: "Tin" + } + input_arg { + name: "values" + type_attr: "Tout" } attr { - name: "T" + name: "Tin" type: "type" - allowed_values { - list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } } - deprecation { - version: 27 - explanation: "Use Python implementation tf.linalg.matrix_exponential instead." + attr { + name: "Tout" + type: "type" } + is_stateful: true } op { - name: "MatrixInverse" + name: "LookupTableRemoveV2" input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } - attr { - name: "adjoint" - type: "bool" - default_value { - b: false - } + input_arg { + name: "keys" + type_attr: "Tin" } attr { - name: "T" + name: "Tin" type: "type" - allowed_values { - list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } } + is_stateful: true } op { - name: "MatrixLogarithm" + name: "LookupTableSize" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_STRING + is_ref: true } output_arg { - name: "output" - type_attr: "T" - } - attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } - } + name: "size" + type: DT_INT64 } } op { - name: "MatrixSetDiag" + name: "LookupTableSizeV2" input_arg { - name: "input" - type_attr: "T" + name: "table_handle" + type: DT_RESOURCE } + output_arg { + name: "size" + type: DT_INT64 + } + is_stateful: true +} +op { + name: "LoopCond" input_arg { - name: "diagonal" - type_attr: "T" + name: "input" + type: DT_BOOL } output_arg { name: "output" - type_attr: "T" - } - attr { - name: "T" - type: "type" + type: DT_BOOL } } op { - name: "MatrixSolve" + name: "LowerBound" input_arg { - name: "matrix" + name: "sorted_inputs" type_attr: "T" } input_arg { - name: "rhs" + name: "values" type_attr: "T" } output_arg { name: "output" - type_attr: "T" + type_attr: "out_type" } attr { - name: "adjoint" - type: "bool" - default_value { - b: false - } + name: "T" + type: "type" } attr { - name: "T" + name: "out_type" type: "type" + default_value { + type: DT_INT32 + } allowed_values { list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "MatrixSolveLs" + name: "Lu" input_arg { - name: "matrix" + name: "input" type_attr: "T" } - input_arg { - name: "rhs" + output_arg { + name: "lu" type_attr: "T" } - input_arg { - name: "l2_regularizer" - type: DT_DOUBLE - } output_arg { - name: "output" - type_attr: "T" + name: "p" + type_attr: "output_idx_type" } attr { name: "T" @@ -17210,549 +17478,487 @@ op { } } attr { - name: "fast" - type: "bool" + name: "output_idx_type" + type: "type" default_value { - b: true + type: DT_INT32 } - } -} -op { - name: "MatrixSquareRoot" - input_arg { - name: "input" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" - } - attr { - name: "T" - type: "type" allowed_values { list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 + type: DT_INT32 + type: DT_INT64 } } } } op { - name: "MatrixTriangularSolve" + name: "MakeIterator" input_arg { - name: "matrix" - type_attr: "T" + name: "dataset" + type: DT_VARIANT } input_arg { - name: "rhs" - type_attr: "T" + name: "iterator" + type: DT_RESOURCE } - output_arg { - name: "output" - type_attr: "T" + is_stateful: true +} +op { + name: "MapClear" + attr { + name: "capacity" + type: "int" + default_value { + i: 0 + } + has_minimum: true } attr { - name: "lower" - type: "bool" + name: "memory_limit" + type: "int" default_value { - b: true + i: 0 } + has_minimum: true } attr { - name: "adjoint" - type: "bool" + name: "dtypes" + type: "list(type)" + } + attr { + name: "container" + type: "string" default_value { - b: false + s: "" } } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_DOUBLE - type: DT_FLOAT - type: DT_HALF - type: DT_COMPLEX64 - type: DT_COMPLEX128 - } + name: "shared_name" + type: "string" + default_value { + s: "" } } + is_stateful: true } op { - name: "Max" + name: "MapDataset" input_arg { - name: "input" - type_attr: "T" + name: "input_dataset" + type: DT_VARIANT } input_arg { - name: "reduction_indices" - type_attr: "Tidx" + name: "other_arguments" + type_list_attr: "Targuments" } output_arg { - name: "output" - type_attr: "T" + name: "handle" + type: DT_VARIANT } attr { - name: "keep_dims" - type: "bool" - default_value { - b: false - } + name: "f" + type: "func" } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_COMPLEX64 - type: DT_INT64 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_COMPLEX128 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 - } - } + name: "Targuments" + type: "list(type)" + has_minimum: true } attr { - name: "Tidx" - type: "type" + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "use_inter_op_parallelism" + type: "bool" default_value { - type: DT_INT32 + b: true } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + } + attr { + name: "preserve_cardinality" + type: "bool" + default_value { + b: false } } } op { - name: "MaxPool" + name: "MapDefun" input_arg { - name: "input" - type_attr: "T" + name: "arguments" + type_list_attr: "Targuments" + } + input_arg { + name: "captured_inputs" + type_list_attr: "Tcaptured" } output_arg { name: "output" - type_attr: "T" + type_list_attr: "output_types" } attr { - name: "T" - type: "type" + name: "Targuments" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Tcaptured" + type: "list(type)" default_value { - type: DT_FLOAT - } - allowed_values { list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_UINT16 - type: DT_QINT8 } } - } - attr { - name: "ksize" - type: "list(int)" has_minimum: true - minimum: 4 } attr { - name: "strides" - type: "list(int)" + name: "output_types" + type: "list(type)" has_minimum: true - minimum: 4 + minimum: 1 } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 } attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - s: "NCHW_VECT_C" - } - } + name: "f" + type: "func" } } op { - name: "MaxPool3D" - input_arg { - name: "input" - type_attr: "T" - } + name: "MapIncompleteSize" output_arg { - name: "output" - type_attr: "T" + name: "size" + type: DT_INT32 } attr { - name: "ksize" - type: "list(int)" + name: "capacity" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "strides" - type: "list(int)" + name: "memory_limit" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "dtypes" + type: "list(type)" } attr { - name: "data_format" + name: "container" type: "string" default_value { - s: "NDHWC" - } - allowed_values { - list { - s: "NDHWC" - s: "NCDHW" - } + s: "" } } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - } + name: "shared_name" + type: "string" + default_value { + s: "" } } + is_stateful: true } op { - name: "MaxPool3DGrad" - input_arg { - name: "orig_input" - type_attr: "TInput" - } + name: "MapPeek" input_arg { - name: "orig_output" - type_attr: "TInput" + name: "key" + type: DT_INT64 } input_arg { - name: "grad" - type_attr: "T" + name: "indices" + type: DT_INT32 } output_arg { - name: "output" - type_attr: "T" + name: "values" + type_list_attr: "dtypes" } attr { - name: "ksize" - type: "list(int)" + name: "capacity" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "strides" - type: "list(int)" + name: "memory_limit" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "padding" + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "container" type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } + default_value { + s: "" } } attr { - name: "data_format" + name: "shared_name" type: "string" default_value { - s: "NDHWC" - } - allowed_values { - list { - s: "NDHWC" - s: "NCDHW" - } + s: "" } } + is_stateful: true +} +op { + name: "MapSize" + output_arg { + name: "size" + type: DT_INT32 + } attr { - name: "T" - type: "type" + name: "capacity" + type: "int" default_value { - type: DT_FLOAT + i: 0 } - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - } + has_minimum: true + } + attr { + name: "memory_limit" + type: "int" + default_value { + i: 0 } + has_minimum: true } attr { - name: "TInput" - type: "type" + name: "dtypes" + type: "list(type)" + } + attr { + name: "container" + type: "string" default_value { - type: DT_FLOAT + s: "" } - allowed_values { - list { - type: DT_HALF - type: DT_BFLOAT16 - type: DT_FLOAT - } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" } } + is_stateful: true } op { - name: "MaxPool3DGradGrad" + name: "MapStage" input_arg { - name: "orig_input" - type_attr: "T" + name: "key" + type: DT_INT64 } input_arg { - name: "orig_output" - type_attr: "T" + name: "indices" + type: DT_INT32 } input_arg { - name: "grad" - type_attr: "T" - } - output_arg { - name: "output" - type_attr: "T" + name: "values" + type_list_attr: "fake_dtypes" } attr { - name: "ksize" - type: "list(int)" + name: "capacity" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "strides" - type: "list(int)" + name: "memory_limit" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 5 } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "dtypes" + type: "list(type)" } attr { - name: "data_format" + name: "fake_dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "container" type: "string" default_value { - s: "NDHWC" - } - allowed_values { - list { - s: "NDHWC" - s: "NCDHW" - } + s: "" } } attr { - name: "T" - type: "type" - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 - } + name: "shared_name" + type: "string" + default_value { + s: "" } } + is_stateful: true } op { - name: "MaxPoolGrad" - input_arg { - name: "orig_input" - type_attr: "T" - } + name: "MapUnstage" input_arg { - name: "orig_output" - type_attr: "T" + name: "key" + type: DT_INT64 } input_arg { - name: "grad" - type_attr: "T" + name: "indices" + type: DT_INT32 } output_arg { - name: "output" - type_attr: "T" + name: "values" + type_list_attr: "dtypes" } attr { - name: "ksize" - type: "list(int)" + name: "capacity" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 4 } attr { - name: "strides" - type: "list(int)" + name: "memory_limit" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 4 } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 } attr { - name: "data_format" + name: "container" type: "string" default_value { - s: "NHWC" - } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } + s: "" } } attr { - name: "T" - type: "type" + name: "shared_name" + type: "string" default_value { - type: DT_FLOAT - } - allowed_values { - list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 - } + s: "" } } + is_stateful: true } op { - name: "MaxPoolGradGrad" - input_arg { - name: "orig_input" - type_attr: "T" - } + name: "MapUnstageNoKey" input_arg { - name: "orig_output" - type_attr: "T" + name: "indices" + type: DT_INT32 } - input_arg { - name: "grad" - type_attr: "T" + output_arg { + name: "key" + type: DT_INT64 } output_arg { - name: "output" - type_attr: "T" + name: "values" + type_list_attr: "dtypes" } attr { - name: "ksize" - type: "list(int)" + name: "capacity" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 4 } attr { - name: "strides" - type: "list(int)" + name: "memory_limit" + type: "int" + default_value { + i: 0 + } has_minimum: true - minimum: 4 } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 } attr { - name: "data_format" + name: "container" type: "string" default_value { - s: "NHWC" + s: "" } - allowed_values { - list { - s: "NHWC" - s: "NCHW" - } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + is_stateful: true +} +op { + name: "MatMul" + input_arg { + name: "a" + type_attr: "T" + } + input_arg { + name: "b" + type_attr: "T" + } + output_arg { + name: "product" + type_attr: "T" + } + attr { + name: "transpose_a" + type: "bool" + default_value { + b: false + } + } + attr { + name: "transpose_b" + type: "bool" + default_value { + b: false } } attr { @@ -17760,284 +17966,377 @@ op { type: "type" allowed_values { list { + type: DT_BFLOAT16 + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "MaxPoolGradGradV2" + name: "MatchingFiles" input_arg { - name: "orig_input" - type_attr: "T" + name: "pattern" + type: DT_STRING } - input_arg { - name: "orig_output" - type_attr: "T" + output_arg { + name: "filenames" + type: DT_STRING } +} +op { + name: "MatrixBandPart" input_arg { - name: "grad" + name: "input" type_attr: "T" } input_arg { - name: "ksize" - type: DT_INT32 + name: "num_lower" + type_attr: "Tindex" } input_arg { - name: "strides" - type: DT_INT32 + name: "num_upper" + type_attr: "Tindex" } output_arg { - name: "output" + name: "band" type_attr: "T" } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } - } + name: "T" + type: "type" } attr { - name: "data_format" - type: "string" + name: "Tindex" + type: "type" default_value { - s: "NHWC" + type: DT_INT64 } allowed_values { list { - s: "NHWC" - s: "NCHW" + type: DT_INT32 + type: DT_INT64 } } } +} +op { + name: "MatrixDeterminant" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "T" type: "type" allowed_values { list { + type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "MaxPoolGradGradWithArgmax" + name: "MatrixDiag" input_arg { - name: "input" + name: "diagonal" type_attr: "T" } - input_arg { - name: "grad" + output_arg { + name: "output" type_attr: "T" } + attr { + name: "T" + type: "type" + } +} +op { + name: "MatrixDiagPart" input_arg { - name: "argmax" - type_attr: "Targmax" + name: "input" + type_attr: "T" } output_arg { - name: "output" + name: "diagonal" type_attr: "T" } attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 + name: "T" + type: "type" } - attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 +} +op { + name: "MatrixExponential" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } + deprecation { + version: 27 + explanation: "Use Python implementation tf.linalg.matrix_exponential instead." + } +} +op { + name: "MatrixInverse" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "adjoint" + type: "bool" + default_value { + b: false + } + } attr { - name: "Targmax" + name: "T" type: "type" allowed_values { list { - type: DT_INT32 - type: DT_INT64 + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } +} +op { + name: "MatrixLogarithm" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "T" type: "type" allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 - type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } } op { - name: "MaxPoolGradV2" + name: "MatrixSetDiag" input_arg { - name: "orig_input" + name: "input" type_attr: "T" } input_arg { - name: "orig_output" + name: "diagonal" type_attr: "T" } - input_arg { - name: "grad" + output_arg { + name: "output" type_attr: "T" } + attr { + name: "T" + type: "type" + } +} +op { + name: "MatrixSolve" input_arg { - name: "ksize" - type: DT_INT32 + name: "matrix" + type_attr: "T" } input_arg { - name: "strides" - type: DT_INT32 + name: "rhs" + type_attr: "T" } output_arg { name: "output" type_attr: "T" } attr { - name: "padding" - type: "string" - allowed_values { - list { - s: "SAME" - s: "VALID" - } + name: "adjoint" + type: "bool" + default_value { + b: false } } attr { - name: "data_format" - type: "string" - default_value { - s: "NHWC" - } + name: "T" + type: "type" allowed_values { list { - s: "NHWC" - s: "NCHW" + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } +} +op { + name: "MatrixSolveLs" + input_arg { + name: "matrix" + type_attr: "T" + } + input_arg { + name: "rhs" + type_attr: "T" + } + input_arg { + name: "l2_regularizer" + type: DT_DOUBLE + } + output_arg { + name: "output" + type_attr: "T" + } attr { name: "T" type: "type" - default_value { - type: DT_FLOAT - } allowed_values { list { - type: DT_FLOAT type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 + type: DT_FLOAT type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } + attr { + name: "fast" + type: "bool" + default_value { + b: true + } + } } op { - name: "MaxPoolGradWithArgmax" + name: "MatrixSquareRoot" input_arg { name: "input" type_attr: "T" } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } +} +op { + name: "MatrixTriangularSolve" input_arg { - name: "grad" + name: "matrix" type_attr: "T" } input_arg { - name: "argmax" - type_attr: "Targmax" + name: "rhs" + type_attr: "T" } output_arg { name: "output" type_attr: "T" } attr { - name: "ksize" - type: "list(int)" - has_minimum: true - minimum: 4 + name: "lower" + type: "bool" + default_value { + b: true + } } attr { - name: "strides" - type: "list(int)" - has_minimum: true - minimum: 4 + name: "adjoint" + type: "bool" + default_value { + b: false + } } attr { - name: "padding" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "SAME" - s: "VALID" + type: DT_DOUBLE + type: DT_FLOAT + type: DT_HALF + type: DT_COMPLEX64 + type: DT_COMPLEX128 } } } +} +op { + name: "Max" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } attr { - name: "Targmax" - type: "type" - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } + name: "keep_dims" + type: "bool" + default_value { + b: false } } attr { @@ -18051,30 +18350,40 @@ op { type: DT_UINT8 type: DT_INT16 type: DT_INT8 + type: DT_COMPLEX64 type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_BFLOAT16 type: DT_UINT16 + type: DT_COMPLEX128 type: DT_HALF type: DT_UINT32 type: DT_UINT64 } } } + attr { + name: "Tidx" + type: "type" + default_value { + type: DT_INT32 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } } op { - name: "MaxPoolV2" + name: "MaxPool" input_arg { name: "input" type_attr: "T" } - input_arg { - name: "ksize" - type: DT_INT32 - } - input_arg { - name: "strides" - type: DT_INT32 - } output_arg { name: "output" type_attr: "T" @@ -18101,6 +18410,18 @@ op { } } } + attr { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } attr { name: "padding" type: "string" @@ -18127,7 +18448,7 @@ op { } } op { - name: "MaxPoolWithArgmax" + name: "MaxPool3D" input_arg { name: "input" type_attr: "T" @@ -18136,42 +18457,38 @@ op { name: "output" type_attr: "T" } - output_arg { - name: "argmax" - type_attr: "Targmax" - } attr { name: "ksize" type: "list(int)" has_minimum: true - minimum: 4 + minimum: 5 } attr { name: "strides" type: "list(int)" has_minimum: true - minimum: 4 + minimum: 5 } attr { - name: "Targmax" - type: "type" - default_value { - type: DT_INT64 - } + name: "padding" + type: "string" allowed_values { list { - type: DT_INT32 - type: DT_INT64 + s: "SAME" + s: "VALID" } } } attr { - name: "padding" + name: "data_format" type: "string" + default_value { + s: "NDHWC" + } allowed_values { list { - s: "SAME" - s: "VALID" + s: "NDHWC" + s: "NCDHW" } } } @@ -18180,71 +18497,146 @@ op { type: "type" allowed_values { list { - type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_UINT8 - type: DT_INT16 - type: DT_INT8 - type: DT_INT64 - type: DT_BFLOAT16 - type: DT_UINT16 type: DT_HALF - type: DT_UINT32 - type: DT_UINT64 + type: DT_BFLOAT16 + type: DT_FLOAT } } } } op { - name: "Maximum" + name: "MaxPool3DGrad" input_arg { - name: "x" - type_attr: "T" + name: "orig_input" + type_attr: "TInput" } input_arg { - name: "y" + name: "orig_output" + type_attr: "TInput" + } + input_arg { + name: "grad" type_attr: "T" } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { - name: "T" - type: "type" - allowed_values { + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NDHWC" + } + allowed_values { + list { + s: "NDHWC" + s: "NCDHW" + } + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { list { + type: DT_HALF type: DT_BFLOAT16 + type: DT_FLOAT + } + } + } + attr { + name: "TInput" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { type: DT_HALF + type: DT_BFLOAT16 type: DT_FLOAT - type: DT_DOUBLE - type: DT_INT32 - type: DT_INT64 } } } - is_commutative: true } op { - name: "Mean" + name: "MaxPool3DGradGrad" input_arg { - name: "input" + name: "orig_input" type_attr: "T" } input_arg { - name: "reduction_indices" - type_attr: "Tidx" + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" } output_arg { name: "output" type_attr: "T" } attr { - name: "keep_dims" - type: "bool" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 5 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" default_value { - b: false + s: "NDHWC" + } + allowed_values { + list { + s: "NDHWC" + s: "NCDHW" + } } } attr { @@ -18258,159 +18650,144 @@ op { type: DT_UINT8 type: DT_INT16 type: DT_INT8 - type: DT_COMPLEX64 type: DT_INT64 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 type: DT_BFLOAT16 type: DT_UINT16 - type: DT_COMPLEX128 type: DT_HALF type: DT_UINT32 type: DT_UINT64 } } } - attr { - name: "Tidx" - type: "type" - default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } - } } op { - name: "Merge" + name: "MaxPoolGrad" input_arg { - name: "inputs" + name: "orig_input" type_attr: "T" - number_attr: "N" } - output_arg { - name: "output" + input_arg { + name: "orig_output" type_attr: "T" } - output_arg { - name: "value_index" - type: DT_INT32 - } - attr { - name: "T" - type: "type" - } - attr { - name: "N" - type: "int" - has_minimum: true - minimum: 1 - } -} -op { - name: "MergeSummary" input_arg { - name: "inputs" - type: DT_STRING - number_attr: "N" + name: "grad" + type_attr: "T" } output_arg { - name: "summary" - type: DT_STRING + name: "output" + type_attr: "T" } attr { - name: "N" - type: "int" + name: "ksize" + type: "list(int)" has_minimum: true - minimum: 1 - } -} -op { - name: "MergeV2Checkpoints" - input_arg { - name: "checkpoint_prefixes" - type: DT_STRING - } - input_arg { - name: "destination_prefix" - type: DT_STRING + minimum: 4 } attr { - name: "delete_old_dirs" - type: "bool" - default_value { - b: true - } - } - is_stateful: true -} -op { - name: "Mfcc" - input_arg { - name: "spectrogram" - type: DT_FLOAT - } - input_arg { - name: "sample_rate" - type: DT_INT32 - } - output_arg { - name: "output" - type: DT_FLOAT + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 } attr { - name: "upper_frequency_limit" - type: "float" - default_value { - f: 4000 + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } } } attr { - name: "lower_frequency_limit" - type: "float" + name: "data_format" + type: "string" default_value { - f: 20 + s: "NHWC" } - } - attr { - name: "filterbank_channel_count" - type: "int" - default_value { - i: 40 + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } attr { - name: "dct_coefficient_count" - type: "int" + name: "T" + type: "type" default_value { - i: 13 + type: DT_FLOAT + } + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } } } } op { - name: "Min" + name: "MaxPoolGradGrad" input_arg { - name: "input" + name: "orig_input" type_attr: "T" } input_arg { - name: "reduction_indices" - type_attr: "Tidx" + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" } output_arg { name: "output" type_attr: "T" } attr { - name: "keep_dims" - type: "bool" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" default_value { - b: false + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } } } attr { @@ -18424,88 +18801,129 @@ op { type: DT_UINT8 type: DT_INT16 type: DT_INT8 - type: DT_COMPLEX64 type: DT_INT64 - type: DT_QINT8 - type: DT_QUINT8 - type: DT_QINT32 type: DT_BFLOAT16 type: DT_UINT16 - type: DT_COMPLEX128 type: DT_HALF type: DT_UINT32 type: DT_UINT64 } } } - attr { - name: "Tidx" - type: "type" - default_value { - type: DT_INT32 - } - allowed_values { - list { - type: DT_INT32 - type: DT_INT64 - } - } - } } op { - name: "Minimum" + name: "MaxPoolGradGradV2" input_arg { - name: "x" + name: "orig_input" type_attr: "T" } input_arg { - name: "y" + name: "orig_output" type_attr: "T" } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } output_arg { - name: "z" + name: "output" type_attr: "T" } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + } + } + } attr { name: "T" type: "type" allowed_values { list { - type: DT_BFLOAT16 - type: DT_HALF type: DT_FLOAT type: DT_DOUBLE type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } - is_commutative: true } op { - name: "MirrorPad" + name: "MaxPoolGradGradWithArgmax" input_arg { name: "input" type_attr: "T" } input_arg { - name: "paddings" - type_attr: "Tpaddings" + name: "grad" + type_attr: "T" + } + input_arg { + name: "argmax" + type_attr: "Targmax" } output_arg { name: "output" type_attr: "T" } attr { - name: "T" - type: "type" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 } attr { - name: "Tpaddings" - type: "type" - default_value { - type: DT_INT32 + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } } + } + attr { + name: "Targmax" + type: "type" allowed_values { list { type: DT_INT32 @@ -18514,113 +18932,307 @@ op { } } attr { - name: "mode" - type: "string" + name: "T" + type: "type" allowed_values { list { - s: "REFLECT" - s: "SYMMETRIC" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "MirrorPadGrad" + name: "MaxPoolGradV2" input_arg { - name: "input" + name: "orig_input" type_attr: "T" } input_arg { - name: "paddings" - type_attr: "Tpaddings" + name: "orig_output" + type_attr: "T" + } + input_arg { + name: "grad" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 } output_arg { name: "output" type_attr: "T" } attr { - name: "T" - type: "type" + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } } attr { - name: "Tpaddings" - type: "type" + name: "data_format" + type: "string" default_value { - type: DT_INT32 + s: "NHWC" } allowed_values { list { - type: DT_INT32 - type: DT_INT64 + s: "NHWC" + s: "NCHW" } } } attr { - name: "mode" - type: "string" + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } allowed_values { list { - s: "REFLECT" - s: "SYMMETRIC" + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } } } op { - name: "Mod" + name: "MaxPoolGradWithArgmax" input_arg { - name: "x" + name: "input" type_attr: "T" } input_arg { - name: "y" + name: "grad" type_attr: "T" } + input_arg { + name: "argmax" + type_attr: "Targmax" + } output_arg { - name: "z" + name: "output" type_attr: "T" } attr { - name: "T" - type: "type" + name: "ksize" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "strides" + type: "list(int)" + has_minimum: true + minimum: 4 + } + attr { + name: "padding" + type: "string" allowed_values { list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "Targmax" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } +} +op { + name: "MaxPoolV2" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "ksize" + type: DT_INT32 + } + input_arg { + name: "strides" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { type: DT_HALF type: DT_BFLOAT16 type: DT_FLOAT type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_UINT16 + type: DT_QINT8 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "data_format" + type: "string" + default_value { + s: "NHWC" + } + allowed_values { + list { + s: "NHWC" + s: "NCHW" + s: "NCHW_VECT_C" } } } } op { - name: "ModelDataset" + name: "MaxPoolWithArgmax" input_arg { - name: "input_dataset" - type: DT_VARIANT + name: "input" + type_attr: "T" } output_arg { - name: "handle" - type: DT_VARIANT + name: "output" + type_attr: "T" + } + output_arg { + name: "argmax" + type_attr: "Targmax" } attr { - name: "output_types" - type: "list(type)" + name: "ksize" + type: "list(int)" has_minimum: true - minimum: 1 + minimum: 4 } attr { - name: "output_shapes" - type: "list(shape)" + name: "strides" + type: "list(int)" has_minimum: true - minimum: 1 + minimum: 4 + } + attr { + name: "Targmax" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "padding" + type: "string" + allowed_values { + list { + s: "SAME" + s: "VALID" + } + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } } } op { - name: "Mul" + name: "Maximum" input_arg { name: "x" type_attr: "T" @@ -18642,173 +19254,198 @@ op { type: DT_HALF type: DT_FLOAT type: DT_DOUBLE - type: DT_UINT8 - type: DT_INT8 - type: DT_UINT16 - type: DT_INT16 type: DT_INT32 type: DT_INT64 - type: DT_COMPLEX64 - type: DT_COMPLEX128 } } } is_commutative: true } op { - name: "MultiDeviceIterator" - output_arg { - name: "handle" - type: DT_RESOURCE - } - attr { - name: "devices" - type: "list(string)" - has_minimum: true - minimum: 1 - } - attr { - name: "shared_name" - type: "string" - } - attr { - name: "container" - type: "string" - } - attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 - } - attr { - name: "output_shapes" - type: "list(shape)" - has_minimum: true - minimum: 1 + name: "Mean" + input_arg { + name: "input" + type_attr: "T" } - is_stateful: true -} -op { - name: "MultiDeviceIteratorFromStringHandle" input_arg { - name: "string_handle" - type: DT_STRING + name: "reduction_indices" + type_attr: "Tidx" } output_arg { - name: "multi_device_iterator" - type: DT_RESOURCE + name: "output" + type_attr: "T" } attr { - name: "output_types" - type: "list(type)" + name: "keep_dims" + type: "bool" default_value { + b: false + } + } + attr { + name: "T" + type: "type" + allowed_values { list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 } } - has_minimum: true } attr { - name: "output_shapes" - type: "list(shape)" + name: "Tidx" + type: "type" default_value { + type: DT_INT32 + } + allowed_values { list { + type: DT_INT32 + type: DT_INT64 } } - has_minimum: true } - is_stateful: true } op { - name: "MultiDeviceIteratorGetNextFromShard" - input_arg { - name: "multi_device_iterator" - type: DT_RESOURCE - } + name: "Merge" input_arg { - name: "shard_num" - type: DT_INT32 + name: "inputs" + type_attr: "T" + number_attr: "N" } - input_arg { - name: "incarnation_id" - type: DT_INT64 + output_arg { + name: "output" + type_attr: "T" } output_arg { - name: "components" - type_list_attr: "output_types" + name: "value_index" + type: DT_INT32 } attr { - name: "output_types" - type: "list(type)" - has_minimum: true - minimum: 1 + name: "T" + type: "type" } attr { - name: "output_shapes" - type: "list(shape)" + name: "N" + type: "int" has_minimum: true minimum: 1 } - is_stateful: true } op { - name: "MultiDeviceIteratorInit" - input_arg { - name: "dataset" - type: DT_VARIANT - } - input_arg { - name: "multi_device_iterator" - type: DT_RESOURCE - } + name: "MergeSummary" input_arg { - name: "max_buffer_size" - type: DT_INT64 + name: "inputs" + type: DT_STRING + number_attr: "N" } output_arg { - name: "incarnation_id" - type: DT_INT64 + name: "summary" + type: DT_STRING + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 } - is_stateful: true } op { - name: "MultiDeviceIteratorToStringHandle" + name: "MergeV2Checkpoints" input_arg { - name: "multi_device_iterator" - type: DT_RESOURCE + name: "checkpoint_prefixes" + type: DT_STRING } - output_arg { - name: "string_handle" + input_arg { + name: "destination_prefix" type: DT_STRING } + attr { + name: "delete_old_dirs" + type: "bool" + default_value { + b: true + } + } is_stateful: true } op { - name: "Multinomial" + name: "Mfcc" input_arg { - name: "logits" - type_attr: "T" + name: "spectrogram" + type: DT_FLOAT } input_arg { - name: "num_samples" + name: "sample_rate" type: DT_INT32 } output_arg { name: "output" - type_attr: "output_dtype" + type: DT_FLOAT } attr { - name: "seed" + name: "upper_frequency_limit" + type: "float" + default_value { + f: 4000 + } + } + attr { + name: "lower_frequency_limit" + type: "float" + default_value { + f: 20 + } + } + attr { + name: "filterbank_channel_count" type: "int" default_value { - i: 0 + i: 40 } } attr { - name: "seed2" + name: "dct_coefficient_count" type: "int" default_value { - i: 0 + i: 13 + } + } +} +op { + name: "Min" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "reduction_indices" + type_attr: "Tidx" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "keep_dims" + type: "bool" + default_value { + b: false } } attr { @@ -18822,9 +19459,14 @@ op { type: DT_UINT8 type: DT_INT16 type: DT_INT8 + type: DT_COMPLEX64 type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 type: DT_BFLOAT16 type: DT_UINT16 + type: DT_COMPLEX128 type: DT_HALF type: DT_UINT32 type: DT_UINT64 @@ -18832,10 +19474,10 @@ op { } } attr { - name: "output_dtype" + name: "Tidx" type: "type" default_value { - type: DT_INT64 + type: DT_INT32 } allowed_values { list { @@ -18844,102 +19486,495 @@ op { } } } - is_stateful: true } op { - name: "MutableDenseHashTable" + name: "Minimum" input_arg { - name: "empty_key" - type_attr: "key_dtype" + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" } output_arg { - name: "table_handle" - type: DT_STRING - is_ref: true + name: "z" + type_attr: "T" } attr { - name: "container" - type: "string" - default_value { - s: "" + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_INT64 + } } } - attr { - name: "shared_name" - type: "string" - default_value { - s: "" - } + is_commutative: true +} +op { + name: "MirrorPad" + input_arg { + name: "input" + type_attr: "T" } - attr { - name: "use_node_name_sharing" - type: "bool" - default_value { - b: false - } + input_arg { + name: "paddings" + type_attr: "Tpaddings" } - attr { - name: "key_dtype" - type: "type" + output_arg { + name: "output" + type_attr: "T" } attr { - name: "value_dtype" + name: "T" type: "type" } attr { - name: "value_shape" - type: "shape" + name: "Tpaddings" + type: "type" default_value { - shape { - } + type: DT_INT32 } - } - attr { - name: "initial_num_buckets" - type: "int" - default_value { - i: 131072 + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } } } attr { - name: "max_load_factor" - type: "float" - default_value { - f: 0.8 + name: "mode" + type: "string" + allowed_values { + list { + s: "REFLECT" + s: "SYMMETRIC" + } } } - is_stateful: true } op { - name: "MutableDenseHashTableV2" + name: "MirrorPadGrad" input_arg { - name: "empty_key" - type_attr: "key_dtype" + name: "input" + type_attr: "T" } input_arg { - name: "deleted_key" - type_attr: "key_dtype" + name: "paddings" + type_attr: "Tpaddings" } output_arg { - name: "table_handle" - type: DT_RESOURCE + name: "output" + type_attr: "T" } attr { - name: "container" - type: "string" - default_value { - s: "" - } + name: "T" + type: "type" } attr { - name: "shared_name" - type: "string" + name: "Tpaddings" + type: "type" default_value { - s: "" + type: DT_INT32 } - } - attr { - name: "use_node_name_sharing" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + attr { + name: "mode" + type: "string" + allowed_values { + list { + s: "REFLECT" + s: "SYMMETRIC" + } + } + } +} +op { + name: "Mod" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + type: DT_HALF + type: DT_HALF + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "ModelDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + output_arg { + name: "handle" + type: DT_VARIANT + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } +} +op { + name: "Mul" + input_arg { + name: "x" + type_attr: "T" + } + input_arg { + name: "y" + type_attr: "T" + } + output_arg { + name: "z" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + type: DT_UINT8 + type: DT_INT8 + type: DT_UINT16 + type: DT_INT16 + type: DT_INT32 + type: DT_INT64 + type: DT_COMPLEX64 + type: DT_COMPLEX128 + } + } + } + is_commutative: true +} +op { + name: "MultiDeviceIterator" + output_arg { + name: "handle" + type: DT_RESOURCE + } + attr { + name: "devices" + type: "list(string)" + has_minimum: true + minimum: 1 + } + attr { + name: "shared_name" + type: "string" + } + attr { + name: "container" + type: "string" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "MultiDeviceIteratorFromStringHandle" + input_arg { + name: "string_handle" + type: DT_STRING + } + output_arg { + name: "multi_device_iterator" + type: DT_RESOURCE + } + attr { + name: "output_types" + type: "list(type)" + default_value { + list { + } + } + has_minimum: true + } + attr { + name: "output_shapes" + type: "list(shape)" + default_value { + list { + } + } + has_minimum: true + } + is_stateful: true +} +op { + name: "MultiDeviceIteratorGetNextFromShard" + input_arg { + name: "multi_device_iterator" + type: DT_RESOURCE + } + input_arg { + name: "shard_num" + type: DT_INT32 + } + input_arg { + name: "incarnation_id" + type: DT_INT64 + } + output_arg { + name: "components" + type_list_attr: "output_types" + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} +op { + name: "MultiDeviceIteratorInit" + input_arg { + name: "dataset" + type: DT_VARIANT + } + input_arg { + name: "multi_device_iterator" + type: DT_RESOURCE + } + input_arg { + name: "max_buffer_size" + type: DT_INT64 + } + output_arg { + name: "incarnation_id" + type: DT_INT64 + } + is_stateful: true +} +op { + name: "MultiDeviceIteratorToStringHandle" + input_arg { + name: "multi_device_iterator" + type: DT_RESOURCE + } + output_arg { + name: "string_handle" + type: DT_STRING + } + is_stateful: true +} +op { + name: "Multinomial" + input_arg { + name: "logits" + type_attr: "T" + } + input_arg { + name: "num_samples" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "output_dtype" + } + attr { + name: "seed" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "seed2" + type: "int" + default_value { + i: 0 + } + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_INT64 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + } + } + } + attr { + name: "output_dtype" + type: "type" + default_value { + type: DT_INT64 + } + allowed_values { + list { + type: DT_INT32 + type: DT_INT64 + } + } + } + is_stateful: true +} +op { + name: "MutableDenseHashTable" + input_arg { + name: "empty_key" + type_attr: "key_dtype" + } + output_arg { + name: "table_handle" + type: DT_STRING + is_ref: true + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_node_name_sharing" + type: "bool" + default_value { + b: false + } + } + attr { + name: "key_dtype" + type: "type" + } + attr { + name: "value_dtype" + type: "type" + } + attr { + name: "value_shape" + type: "shape" + default_value { + shape { + } + } + } + attr { + name: "initial_num_buckets" + type: "int" + default_value { + i: 131072 + } + } + attr { + name: "max_load_factor" + type: "float" + default_value { + f: 0.8 + } + } + is_stateful: true +} +op { + name: "MutableDenseHashTableV2" + input_arg { + name: "empty_key" + type_attr: "key_dtype" + } + input_arg { + name: "deleted_key" + type_attr: "key_dtype" + } + output_arg { + name: "table_handle" + type: DT_RESOURCE + } + attr { + name: "container" + type: "string" + default_value { + s: "" + } + } + attr { + name: "shared_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_node_name_sharing" type: "bool" default_value { b: false @@ -20228,6 +21263,80 @@ op { } is_stateful: true } +op { + name: "OutfeedDequeue" + output_arg { + name: "output" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + attr { + name: "shape" + type: "shape" + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "OutfeedDequeueTuple" + output_arg { + name: "outputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "shapes" + type: "list(shape)" + } + attr { + name: "device_ordinal" + type: "int" + default_value { + i: -1 + } + } + is_stateful: true +} +op { + name: "OutfeedEnqueue" + input_arg { + name: "input" + type_attr: "dtype" + } + attr { + name: "dtype" + type: "type" + } + is_stateful: true +} +op { + name: "OutfeedEnqueueTuple" + input_arg { + name: "inputs" + type_list_attr: "dtypes" + } + attr { + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 + } + is_stateful: true +} op { name: "Pack" input_arg { @@ -26022,6 +27131,25 @@ op { } is_stateful: true } +op { + name: "RecvTPUEmbeddingActivations" + output_arg { + name: "outputs" + type: DT_FLOAT + number_attr: "num_outputs" + } + attr { + name: "num_outputs" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "config" + type: "string" + } + is_stateful: true +} op { name: "ReduceDataset" input_arg { @@ -29689,6 +30817,690 @@ op { } is_stateful: true } +op { + name: "RetrieveTPUEmbeddingADAMParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "velocities" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "velocities" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdadeltaParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "updates" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "updates" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdagradParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingCenteredRMSPropParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + output_arg { + name: "mg" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingFTRLParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "linears" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "linears" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMDLAdagradLightParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "weights" + type: DT_FLOAT + } + output_arg { + name: "benefits" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMomentumParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "momenta" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingProximalAdagradParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "accumulators" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingRMSPropParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug" + output_arg { + name: "parameters" + type: DT_FLOAT + } + output_arg { + name: "ms" + type: DT_FLOAT + } + output_arg { + name: "mom" + type: DT_FLOAT + } + output_arg { + name: "gradient_accumulators" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} +op { + name: "RetrieveTPUEmbeddingStochasticGradientDescentParameters" + output_arg { + name: "parameters" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + default_value { + i: -1 + } + has_minimum: true + minimum: -1 + } + attr { + name: "table_name" + type: "string" + default_value { + s: "" + } + } + attr { + name: "num_shards" + type: "int" + } + attr { + name: "shard_id" + type: "int" + } + is_stateful: true +} op { name: "Reverse" input_arg { @@ -31714,6 +33526,38 @@ op { } } } +op { + name: "SendTPUEmbeddingGradients" + input_arg { + name: "inputs" + type: DT_FLOAT + number_attr: "N" + } + input_arg { + name: "learning_rates" + type: DT_FLOAT + number_attr: "NN" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "NN" + type: "int" + default_value { + i: 0 + } + has_minimum: true + } + attr { + name: "config" + type: "string" + } + is_stateful: true +} op { name: "SerializeIterator" input_arg { @@ -32064,6 +33908,10 @@ op { minimum: 1 } } +op { + name: "ShutdownDistributedTPU" + is_stateful: true +} op { name: "Sigmoid" input_arg { @@ -37311,6 +39159,285 @@ op { } is_stateful: true } +op { + name: "TPUCompilationResult" + output_arg { + name: "output" + type: DT_STRING + } +} +op { + name: "TPUEmbeddingActivations" + input_arg { + name: "embedding_variable" + type: DT_FLOAT + } + input_arg { + name: "sliced_activations" + type: DT_FLOAT + } + output_arg { + name: "output" + type: DT_FLOAT + } + attr { + name: "table_id" + type: "int" + has_minimum: true + } + attr { + name: "lookup_id" + type: "int" + has_minimum: true + } +} +op { + name: "TPUOrdinalSelector" + output_arg { + name: "device_ordinals" + type: DT_INT32 + } + is_stateful: true +} +op { + name: "TPUPartitionedCall" + input_arg { + name: "args" + type_list_attr: "Tin" + } + input_arg { + name: "device_ordinal" + type: DT_INT32 + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + } + attr { + name: "f" + type: "func" + } +} +op { + name: "TPUReplicate" + input_arg { + name: "inputs" + type_list_attr: "Tinputs" + } + input_arg { + name: "broadcast_inputs" + type_list_attr: "Tbroadcast_inputs" + } + input_arg { + name: "variables" + type: DT_RESOURCE + number_attr: "NumVariables" + } + input_arg { + name: "guaranteed_constants" + type_list_attr: "Tguaranteed_constants" + } + output_arg { + name: "outputs" + type_list_attr: "output_types" + } + attr { + name: "computation" + type: "func" + } + attr { + name: "num_replicas" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "num_cores_per_replica" + type: "int" + default_value { + i: 1 + } + } + attr { + name: "topology" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_tpu" + type: "bool" + default_value { + b: true + } + } + attr { + name: "device_assignment" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "host_compute_core" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "Tinputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "Tbroadcast_inputs" + type: "list(type)" + has_minimum: true + } + attr { + name: "NumVariables" + type: "int" + has_minimum: true + } + attr { + name: "Tguaranteed_constants" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + } + attr { + name: "padding_map" + type: "list(string)" + default_value { + list { + } + } + } + is_stateful: true +} +op { + name: "TPUReplicateMetadata" + attr { + name: "num_replicas" + type: "int" + has_minimum: true + } + attr { + name: "num_cores_per_replica" + type: "int" + default_value { + i: 1 + } + } + attr { + name: "topology" + type: "string" + default_value { + s: "" + } + } + attr { + name: "use_tpu" + type: "bool" + default_value { + b: true + } + } + attr { + name: "device_assignment" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "computation_shape" + type: "list(int)" + default_value { + list { + } + } + } + attr { + name: "host_compute_core" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "padding_map" + type: "list(string)" + default_value { + list { + } + } + } +} +op { + name: "TPUReplicatedInput" + input_arg { + name: "inputs" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } +} +op { + name: "TPUReplicatedOutput" + input_arg { + name: "input" + type_attr: "T" + } + output_arg { + name: "outputs" + type_attr: "T" + number_attr: "num_replicas" + } + attr { + name: "num_replicas" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } +} op { name: "TakeDataset" input_arg { @@ -40936,6 +43063,18 @@ op { minimum: 1 } } +op { + name: "WorkerHeartbeat" + input_arg { + name: "request" + type: DT_STRING + } + output_arg { + name: "response" + type: DT_STRING + } + is_stateful: true +} op { name: "WrapDatasetVariant" input_arg { -- GitLab From a1c23dd80f28e60b466ae98883b46eb0ca6273f5 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Wed, 13 Feb 2019 18:29:04 -0800 Subject: [PATCH 107/351] Fixing a bug that would result in dropping options when cloning a dataset. PiperOrigin-RevId: 233871591 --- tensorflow/python/distribute/input_lib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py index 1957fc4287..b6adad05d3 100644 --- a/tensorflow/python/distribute/input_lib.py +++ b/tensorflow/python/distribute/input_lib.py @@ -255,6 +255,7 @@ class DatasetIterator(InputIteratorImpl): cloned_dataset = dataset if not context.executing_eagerly(): cloned_dataset = input_ops._clone_dataset(dataset) # pylint: disable=protected-access + cloned_dataset = cloned_dataset.with_options(dataset.options()) iterator = _SingleWorkerDatasetIterator(cloned_dataset, worker, worker_devices) iterators.append(iterator) @@ -352,7 +353,8 @@ def _get_batched_dataset(d): if isinstance(d, (dataset_ops.BatchDataset, batching._MapAndBatchDataset)): return d - elif isinstance(d, dataset_ops.PrefetchDataset): + elif isinstance(d, (dataset_ops.PrefetchDataset, + dataset_ops._OptionsDataset)): return _get_batched_dataset(d._input_dataset) raise ValueError( -- GitLab From 95fd0fa9f2eed0beda9923f27d37270e0b5d9353 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Wed, 13 Feb 2019 18:42:52 -0800 Subject: [PATCH 108/351] tfdbg: de-flake and re-enable a test in source_remote_test For some reason, the debug grpc server startup is sometimes delayed in Python3. Adding polling of test server to fix that. PiperOrigin-RevId: 233873150 --- tensorflow/python/debug/lib/source_remote_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/debug/lib/source_remote_test.py b/tensorflow/python/debug/lib/source_remote_test.py index fe0323692d..dce400c9ab 100644 --- a/tensorflow/python/debug/lib/source_remote_test.py +++ b/tensorflow/python/debug/lib/source_remote_test.py @@ -48,7 +48,8 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): test_util.TensorFlowTestCase.setUpClass() (cls._server_port, cls._debug_server_url, cls._server_dump_dir, cls._server_thread, - cls._server) = grpc_debug_test_server.start_server_on_separate_thread() + cls._server) = grpc_debug_test_server.start_server_on_separate_thread( + poll_server=True) cls._server_address = "localhost:%d" % cls._server_port (cls._server_port_2, cls._debug_server_url_2, cls._server_dump_dir_2, cls._server_thread_2, @@ -201,8 +202,7 @@ class SendTracebacksTest(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): self._server.query_source_file_line(tf_trace_file_path, 0) - # TODO(b/124381153): Re-enable this test when not flaky. - def DISABLED_testSendEagerTracebacksToSingleDebugServer(self): + def testSendEagerTracebacksToSingleDebugServer(self): this_func_name = "testSendEagerTracebacksToSingleDebugServer" send_traceback = traceback.extract_stack() send_lineno = line_number_above() -- GitLab From 2afb07be92c2aca611d8878062629e050cf2acd5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 18:46:12 -0800 Subject: [PATCH 109/351] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 233873495 --- tensorflow/go/op/wrappers.go | 37291 ++++++++++++++++++--------------- 1 file changed, 19981 insertions(+), 17310 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 5e6dba04a7..602c92a351 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -619,139 +619,6 @@ func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Outp return op.Output(0) } -// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2. -type QuantizeAndDequantizeV2Attr func(optionalAttr) - -// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value. -// -// value: Whether the quantization is signed or unsigned. (actually this parameter should -// have been called `signed_output`) -// If not specified, defaults to true -func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value. -// -// value: The bitwidth of the quantization. -// If not specified, defaults to 8 -func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value. -// -// value: Whether the range is given or should be determined from the `input` tensor. -// If not specified, defaults to false -func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeV2RoundMode sets the optional round_mode attribute to value. -// -// value: The 'round_mode' attribute controls which rounding tie-breaking algorithm is -// used when rounding float values to their quantized equivalents. The following -// rounding modes are currently supported: -// -// * HALF_TO_EVEN: this is the default round_mode. -// * HALF_UP: round towards positive. In this mode 7.5 rounds up to 8 and -7.5 -// rounds up to -7. -// -// If not specified, defaults to "HALF_TO_EVEN" -func QuantizeAndDequantizeV2RoundMode(value string) QuantizeAndDequantizeV2Attr { - return func(m optionalAttr) { - m["round_mode"] = value - } -} - -// Quantizes then dequantizes a tensor. -// -// This op simulates the precision loss from the quantized forward pass by: -// -// 1. Quantizing the tensor to fixed point numbers, which should match the target -// quantization method when it is used in inference. -// 2. Dequantizing it back to floating point numbers for the following ops, most -// likely matmul. -// -// There are different ways to quantize. This version uses only scaling, so 0.0 -// maps to 0. -// -// From the specified 'num_bits' in the quantized output type, it determines -// minimum and maximum representable quantized values. -// -// e.g. -// -// * [-128, 127] for signed, num_bits = 8, or -// * [0, 255] for unsigned, num_bits = 8. -// -// If range_given == False, the initial input_min, input_max will be determined -// automatically as the minimum and maximum values in the input tensor, otherwise -// the specified values of input_min, input_max are used. -// -// Note: If the input_min, input_max are specified, they do not need to equal the -// actual minimum and maximum values in the tensor. e.g. in some cases it may be -// beneficial to specify these values such that the low probability extremes of the -// input distribution are clipped. -// -// This op determines the maximum scale_factor that would map the initial -// [input_min, input_max] range to a range that lies within the representable -// quantized range. -// -// It determines the scale from one of input_min and input_max, then updates the -// other one to maximize the respresentable range. -// -// e.g. -// -// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, -// 5.0]: it would use a scale_factor of -128 / -10.0 = 12.8 In this case, it -// would update input_max to be 127 / 12.8 = 9.921875 -// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, -// 10.0]: it would use a scale_factor of 127 / 10.0 = 12.7 In this case, it -// would update input_min to be 128.0 / 12.7 = -10.07874 -// * if the output is unsigned, input_min is forced to be 0, and only the -// specified input_max is used. -// -// After determining the scale_factor and updating the input range, it applies the -// following to each value in the 'input' tensor. -// -// output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor. -// -// The above round function rounds the value based on the given round_mode. -// -// -// Arguments: -// input: Tensor to quantize and then dequantize. -// input_min: If `range_given == True`, this specifies the minimum input value that needs to -// be represented, otherwise it is determined from the min value of the `input` -// tensor. -// input_max: If `range_given == True`, this specifies the maximum input value that needs to -// be represented, otherwise it is determined from the max value of the `input` -// tensor. -func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV2", - Input: []tf.Input{ - input, input_min, input_max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Bitcasts a tensor from one type to another without copying data. // // Given a tensor `input`, this operation returns a tensor that has the same buffer @@ -782,49 +649,6 @@ func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output return op.Output(0) } -// Extract `patches` from `images` and put them in the "depth" output dimension. -// -// Arguments: -// images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`. -// ksizes: The size of the sliding window for each dimension of `images`. -// strides: 1-D of length 4. How far the centers of two consecutive patches are in -// the images. Must be: `[1, stride_rows, stride_cols, 1]`. -// rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the -// input stride, specifying how far two consecutive patch samples are in the -// input. Equivalent to extracting patches with -// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by -// subsampling them spatially by a factor of `rates`. This is equivalent to -// `rate` in dilated (a.k.a. Atrous) convolutions. -// padding: The type of padding algorithm to use. -// -// We specify the size-related attributes as: -// -// ```python -// ksizes = [1, ksize_rows, ksize_cols, 1] -// strides = [1, strides_rows, strides_cols, 1] -// rates = [1, rates_rows, rates_cols, 1] -// ``` -// -// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows * -// ksize_cols * depth]` containing image patches with size -// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note -// `out_rows` and `out_cols` are the dimensions of the output patches. -func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "ExtractImagePatches", - Input: []tf.Input{ - images, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SpaceToDepthAttr is an optional argument to SpaceToDepth. type SpaceToDepthAttr func(optionalAttr) @@ -1195,65 +1019,6 @@ func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddin return op.Output(0) } -// ListDiffAttr is an optional argument to ListDiff. -type ListDiffAttr func(optionalAttr) - -// ListDiffOutIdx sets the optional out_idx attribute to value. -// If not specified, defaults to DT_INT32 -func ListDiffOutIdx(value tf.DataType) ListDiffAttr { - return func(m optionalAttr) { - m["out_idx"] = value - } -} - -// Computes the difference between two lists of numbers or strings. -// -// Given a list `x` and a list `y`, this operation returns a list `out` that -// represents all values that are in `x` but not in `y`. The returned list `out` -// is sorted in the same order that the numbers appear in `x` (duplicates are -// preserved). This operation also returns a list `idx` that represents the -// position of each `out` element in `x`. In other words: -// -// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]` -// -// For example, given this input: -// -// ``` -// x = [1, 2, 3, 4, 5, 6] -// y = [1, 3, 5] -// ``` -// -// This operation would return: -// -// ``` -// out ==> [2, 4, 6] -// idx ==> [1, 3, 5] -// ``` -// -// Arguments: -// x: 1-D. Values to keep. -// y: 1-D. Values to remove. -// -// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`. -func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ListDiff", - Input: []tf.Input{ - x, y, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Inserts a dimension of 1 into a tensor's shape. // // Given a tensor `input`, this operation inserts a dimension of 1 at the @@ -1603,78 +1368,6 @@ func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) return op.Output(0) } -// StridedSliceGradAttr is an optional argument to StridedSliceGrad. -type StridedSliceGradAttr func(optionalAttr) - -// StridedSliceGradBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// StridedSliceGradEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradEndMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} - -// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr { - return func(m optionalAttr) { - m["shrink_axis_mask"] = value - } -} - -// Returns the gradient of `StridedSlice`. -// -// Since `StridedSlice` cuts out pieces of its `input` which is size -// `shape`, its gradient will have the same shape (which is passed here -// as `shape`). The gradient will be zero in any element that the slice -// does not select. -// -// Arguments are the same as StridedSliceGrad with the exception that -// `dy` is the input gradient to be propagated and `shape` is the -// shape of `StridedSlice`'s `input`. -func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StridedSliceGrad", - Input: []tf.Input{ - shape, begin, end, strides, dy, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // StridedSliceAttr is an optional argument to StridedSlice. type StridedSliceAttr func(optionalAttr) @@ -1868,37 +1561,6 @@ func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, return op.Output(0) } -// Return a slice from 'input'. -// -// The output tensor is a tensor with dimensions described by 'size' -// whose values are extracted from 'input' starting at the offsets in -// 'begin'. -// -// *Requirements*: -// 0 <= begin[i] <= begin[i] + size[i] <= Di for i in [0, n) -// -// Arguments: -// -// begin: begin[i] specifies the offset into the 'i'th dimension of -// 'input' to slice from. -// size: size[i] specifies the number of elements of the 'i'th dimension -// of 'input' to slice. If size[i] is -1, all remaining elements in dimension -// i are included in the slice (i.e. this is equivalent to setting -// size[i] = input.dim_size(i) - begin[i]). -func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Slice", - Input: []tf.Input{ - input, begin, size, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // SizeAttr is an optional argument to Size. type SizeAttr func(optionalAttr) @@ -3290,30 +2952,6 @@ func InplaceSub(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Outpu return op.Output(0) } -// Updates specified rows with values in `v`. -// -// Computes `x[i, :] = v; return x`. -// -// Arguments: -// x: A tensor of type `T`. -// i: A vector. Indices into the left-most dimension of `x`. -// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. -// -// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. -func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "InplaceUpdate", - Input: []tf.Input{ - x, i, v, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Makes a copy of `x`. // // Arguments: @@ -4434,81 +4072,6 @@ func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int6 return op.Output(0), op.Output(1), op.Output(2) } -// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping. -type GenerateVocabRemappingAttr func(optionalAttr) - -// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value. -// -// value: Number of entries in the old vocab file to consider. If -1, -// use the entire old vocabulary. -// If not specified, defaults to -1 -// -// REQUIRES: value >= -1 -func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr { - return func(m optionalAttr) { - m["old_vocab_size"] = value - } -} - -// Given a path to new and old vocabulary files, returns a remapping Tensor of -// -// length `num_new_vocab`, where `remapping[i]` contains the row number in the old -// vocabulary that corresponds to row `i` in the new vocabulary (starting at line -// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` -// in the new vocabulary is not in the old vocabulary. The old vocabulary is -// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the -// default value of -1. -// -// `num_vocab_offset` enables -// use in the partitioned variable case, and should generally be set through -// examining partitioning info. The format of the files should be a text file, -// with each line containing a single entity within the vocabulary. -// -// For example, with `new_vocab_file` a text file containing each of the following -// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], -// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be -// `[0, -1, 2]`. -// -// The op also returns a count of how many entries in the new vocabulary -// were present in the old vocabulary, which is used to calculate the number of -// values to initialize in a weight matrix remapping -// -// This functionality can be used to remap both row vocabularies (typically, -// features) and column vocabularies (typically, classes) from TensorFlow -// checkpoints. Note that the partitioning logic relies on contiguous vocabularies -// corresponding to div-partitioned variables. Moreover, the underlying remapping -// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should -// use the corresponding index_table_from_file() as the FeatureColumn framework -// does (as opposed to tf.feature_to_id(), which uses a CuckooTable). -// -// Arguments: -// new_vocab_file: Path to the new vocab file. -// old_vocab_file: Path to the old vocab file. -// new_vocab_offset: How many entries into the new vocab file to start reading. -// num_new_vocab: Number of entries in the new vocab file to remap. -// -// Returns A Tensor of length num_new_vocab where the element at index i -// is equal to the old ID that maps to the new ID i. This element is -1 for any -// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab. -func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "GenerateVocabRemapping", - Input: []tf.Input{ - new_vocab_file, old_vocab_file, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Broadcasts a tensor value to one or more other devices. func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { if scope.Err() != nil { @@ -6053,77 +5616,6 @@ func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size return op.Output(0) } -// MapUnstageAttr is an optional argument to MapUnstage. -type MapUnstageAttr func(optionalAttr) - -// MapUnstageCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageCapacity(value int64) MapUnstageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageMemoryLimit(value int64) MapUnstageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageContainer(value string) MapUnstageAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageSharedName(value string) MapUnstageAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the values associated with the key -// -// from the underlying container. If the underlying container -// does not contain this key, the op will block until it does. -func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstage", - Input: []tf.Input{ - key, indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstage", err) - return - } - return values -} - // MapPeekAttr is an optional argument to MapPeek. type MapPeekAttr func(optionalAttr) @@ -6692,170 +6184,92 @@ func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtyp return op.Output(0), op.Output(1) } -// EditDistanceAttr is an optional argument to EditDistance. -type EditDistanceAttr func(optionalAttr) - -// EditDistanceNormalize sets the optional normalize attribute to value. -// -// value: boolean (if true, edit distances are normalized by length of truth). -// -// The output is: -// If not specified, defaults to true -func EditDistanceNormalize(value bool) EditDistanceAttr { - return func(m optionalAttr) { - m["normalize"] = value - } -} - -// Computes the (possibly normalized) Levenshtein Edit Distance. -// -// The inputs are variable-length sequences provided by SparseTensors -// (hypothesis_indices, hypothesis_values, hypothesis_shape) -// and -// (truth_indices, truth_values, truth_shape). -// -// The inputs are: -// -// Arguments: -// hypothesis_indices: The indices of the hypothesis list SparseTensor. -// This is an N x R int64 matrix. -// hypothesis_values: The values of the hypothesis list SparseTensor. -// This is an N-length vector. -// hypothesis_shape: The shape of the hypothesis list SparseTensor. -// This is an R-length vector. -// truth_indices: The indices of the truth list SparseTensor. -// This is an M x R int64 matrix. -// truth_values: The values of the truth list SparseTensor. -// This is an M-length vector. -// truth_shape: truth indices, vector. -// -// Returns A dense float tensor with rank R - 1. -// -// For the example input: -// -// // hypothesis represents a 2x1 matrix with variable-length values: -// // (0,0) = ["a"] -// // (1,0) = ["b"] -// hypothesis_indices = [[0, 0, 0], -// [1, 0, 0]] -// hypothesis_values = ["a", "b"] -// hypothesis_shape = [2, 1, 1] -// -// // truth represents a 2x2 matrix with variable-length values: -// // (0,0) = [] -// // (0,1) = ["a"] -// // (1,0) = ["b", "c"] -// // (1,1) = ["a"] -// truth_indices = [[0, 1, 0], -// [1, 0, 0], -// [1, 0, 1], -// [1, 1, 0]] -// truth_values = ["a", "b", "c", "a"] -// truth_shape = [2, 2, 2] -// normalize = true -// -// The output will be: +// Deprecated. Use TensorArrayGradV3 // -// // output is a 2x2 matrix with edit distances normalized by truth lengths. -// output = [[inf, 1.0], // (0,0): no truth, (0,1): no hypothesis -// [0.5, 1.0]] // (1,0): addition, (1,1): no hypothesis -func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3 +func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "EditDistance", + Type: "TensorArrayWriteV2", Input: []tf.Input{ - hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape, + handle, index, value, flow_in, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns 0 if x == 0, and x * log(y) otherwise, elementwise. -func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Deprecated. Use TensorArrayGradV3 +// +// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3 +func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "Xlogy", + Type: "TensorArrayGradV2", Input: []tf.Input{ - x, y, + handle, flow_in, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. -type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) +// TensorArrayV2Attr is an optional argument to TensorArrayV2. +type TensorArrayV2Attr func(optionalAttr) -// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { +// TensorArrayV2ElementShape sets the optional element_shape attribute to value. +// If not specified, defaults to +func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { return func(m optionalAttr) { - m["data_format"] = value + m["element_shape"] = value } } -// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. -// -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { +// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. +// If not specified, defaults to false +func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { return func(m optionalAttr) { - m["dilations"] = value + m["dynamic_size"] = value } } -// Computes the gradients of depthwise convolution with respect to the input. -// -// Arguments: -// input_sizes: An integer vector representing the shape of `input`, based -// on `data_format`. For example, if `data_format` is 'NHWC' then -// `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. +// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. +// If not specified, defaults to true +func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value + } +} + +// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. +// If not specified, defaults to "" +func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { + return func(m optionalAttr) { + m["tensor_array_name"] = value + } +} + +// Deprecated. Use TensorArrayV3 // -// Returns 4-D with shape according to `data_format`. For example, if -// `data_format` is 'NHWC', output shape is `[batch, in_height, -// in_width, in_channels]`. Gradient w.r.t. the input of the -// convolution. -func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayV3 +func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropInput", + Type: "TensorArrayV2", Input: []tf.Input{ - input_sizes, filter, out_backprop, + size, }, Attrs: attrs, } @@ -6863,112 +6277,82 @@ func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, fil return op.Output(0) } -// Returns x / y element-wise. +// Split the data from the input value into TensorArray elements. // -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Div", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Selects the k nearest centers for each point. +// Assuming that `lengths` takes on values // -// Rows of points are assumed to be input points. Rows of centers are assumed to be -// the list of candidate centers. For each point, the k centers that have least L2 -// distance to it are computed. +// ```(n0, n1, ..., n(T-1))``` // -// Arguments: -// points: Matrix of shape (n, d). Rows are assumed to be input points. -// centers: Matrix of shape (m, d). Rows are assumed to be centers. -// k: Number of nearest centers to return for each point. If k is larger than m, then -// only m centers are returned. +// and that `value` has shape // -// Returns Matrix of shape (n, min(m, k)). Each row contains the indices of the centers -// closest to the corresponding point, ordered by increasing distance.Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the -// corresponding center in nearest_center_indices. -func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "NearestNeighbors", - Input: []tf.Input{ - points, centers, k, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Returns x * y element-wise. +// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```, // -// *NOTE*: `Multiply` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// this splits values into a TensorArray with T tensors. +// +// TensorArray index t will be the subtensor of values with starting position +// +// ```(n0 + n1 + ... + n(t-1), 0, 0, ...)``` +// +// and having size +// +// ```nt x d0 x d1 x ...``` +// +// Arguments: +// handle: The handle to a TensorArray. +// value: The concatenated tensor to write to the TensorArray. +// lengths: The vector of lengths, how to split the rows of value into the +// TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// +// Returns A float scalar that enforces proper chaining of operations. +func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Mul", + Type: "TensorArraySplitV3", Input: []tf.Input{ - x, y, + handle, value, lengths, flow_in, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// BiasAddAttr is an optional argument to BiasAdd. -type BiasAddAttr func(optionalAttr) +// EmptyAttr is an optional argument to Empty. +type EmptyAttr func(optionalAttr) -// BiasAddDataFormat sets the optional data_format attribute to value. +// EmptyInit sets the optional init attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the bias tensor will be added to the last dimension -// of the value tensor. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// The tensor will be added to "in_channels", the third-to-the-last -// dimension. -// If not specified, defaults to "NHWC" -func BiasAddDataFormat(value string) BiasAddAttr { +// value: If True, initialize the returned tensor with the default value of dtype. Otherwise, the implementation is free not to initializethe tensor's content. +// If not specified, defaults to false +func EmptyInit(value bool) EmptyAttr { return func(m optionalAttr) { - m["data_format"] = value + m["init"] = value } } -// Adds `bias` to `value`. +// Creates a tensor with the given shape. // -// This is a special case of `tf.add` where `bias` is restricted to be 1-D. -// Broadcasting is supported, so `value` may have any number of dimensions. +// This operation creates a tensor of `shape` and `dtype`. // // Arguments: -// value: Any number of dimensions. -// bias: 1-D with size the last dimension of `value`. +// shape: 1-D. Represents the shape of the output tensor. // -// Returns Broadcasted sum of `value` and `bias`. -func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { +// +// Returns A `Tensor` of type `T`. +func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "BiasAdd", + Type: "Empty", Input: []tf.Input{ - value, bias, + shape, }, Attrs: attrs, } @@ -6976,269 +6360,360 @@ func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddA return op.Output(0) } -// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. -type SparseReduceSumSparseAttr func(optionalAttr) +// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3. +type TensorArrayConcatV3Attr func(optionalAttr) -// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value. +// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr { +// value: The expected shape of an element, if known, +// excluding the first dimension. Used to validate the shapes of +// TensorArray elements. If this shape is not fully specified, concatenating +// zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr { return func(m optionalAttr) { - m["keep_dims"] = value + m["element_shape_except0"] = value } } -// Computes the sum of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In contrast to SparseReduceSum, this Op returns a -// SparseTensor. +// Concat the elements from the TensorArray into value `value`. // -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. +// Takes `T` elements of shapes // -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// ``` +// (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...) +// ``` +// +// and concatenates them into a Tensor of shape: +// +// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)``` +// +// All elements must have the same shape (excepting the first dimension). // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// handle: The handle to a TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. +// +// Returns All of the elements in the TensorArray, concatenated along the first +// axis.A vector of the row sizes of the original T elements in the +// value output. In the example above, this would be the values: +// `(n1, n2, ..., n(T-1))`. +func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceSumSparse", + Type: "TensorArrayConcatV3", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + handle, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1) } -// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. -type AllCandidateSamplerAttr func(optionalAttr) - -// AllCandidateSamplerSeed sets the optional seed attribute to value. +// Scatter the data from the input value into specific TensorArray elements. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value +// `indices` must be a vector, its length must match the first dim of `value`. +// +// Arguments: +// handle: The handle to a TensorArray. +// indices: The locations at which to write the tensor elements. +// value: The concatenated tensor to write to the TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// +// Returns A float scalar that enforces proper chaining of operations. +func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArrayScatterV3", + Input: []tf.Input{ + handle, indices, value, flow_in, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. +type TensorArrayGatherV3Attr func(optionalAttr) + +// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { return func(m optionalAttr) { - m["seed2"] = value + m["element_shape"] = value } } -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. +// Gather specific elements from the TensorArray into output `value`. // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// All elements selected by `indices` must have the same shape. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to produce. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. +// handle: The handle to a TensorArray. +// indices: The locations in the TensorArray from which to read tensor elements. +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns All of the elements in the TensorArray, concatenated along a new +// axis (the new dimension 0). +func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AllCandidateSampler", + Type: "TensorArrayGatherV3", Input: []tf.Input{ - true_classes, + handle, indices, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Returns x + y element-wise. -// -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AddV2", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) return op.Output(0) } -// Returns an element-wise indication of the sign of a number. +// Creates a TensorArray for storing multiple gradients of values in the given handle. // -// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. +// Similar to TensorArrayGradV3. However it creates an accumulator with an +// expanded shape compared to the input TensorArray whose gradient is being +// computed. This enables multiple gradients for the same TensorArray to be +// calculated using the same accumulator. // -// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. -func Sign(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// handle: The handle to the forward TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// shape_to_prepend: An int32 vector representing a shape. Elements in the gradient accumulator will +// have shape which is this shape_to_prepend value concatenated with shape of the +// elements in the TensorArray corresponding to the input handle. +// source: The gradient source string, used to decide which gradient TensorArray +// to return. +func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output, shape_to_prepend tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "Sign", + Type: "TensorArrayGradWithShape", Input: []tf.Input{ - x, + handle, flow_in, shape_to_prepend, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Creates a dataset that passes a sliding window over `input_dataset`. +// Creates a TensorArray for storing the gradients of values in the given handle. // -// Arguments: +// If the given TensorArray gradient already exists, returns a reference to it. // -// window_size: A scalar representing the number of elements in the -// sliding window. -// window_shift: A scalar representing the steps moving the sliding window -// forward in one iteration. It must be positive. -// window_stride: A scalar representing the stride of the input elements of the sliding window. -// It must be positive. +// Locks the size of the original TensorArray by disabling its dynamic size flag. // +// **A note about the input flow_in:** // -func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// The handle flow_in forces the execution of the gradient lookup to occur +// only after certain other operations have occurred. For example, when +// the forward TensorArray is dynamically sized, writes to this TensorArray +// may resize the object. The gradient TensorArray is statically sized based +// on the size of the forward TensorArray when this operation executes. +// Furthermore, the size of the forward TensorArray is frozen by this call. +// As a result, the flow is used to ensure that the call to generate the gradient +// TensorArray only happens after all writes are executed. +// +// In the case of dynamically sized TensorArrays, gradient computation should +// only be performed on read operations that have themselves been chained via +// flow to occur only after all writes have executed. That way the final size +// of the forward TensorArray is known when this operation is called. +// +// **A note about the source attribute:** +// +// TensorArray gradient calls use an accumulator TensorArray object. If +// multiple gradients are calculated and run in the same session, the multiple +// gradient nodes may accidentally flow through the same accumulator TensorArray. +// This double counts and generally breaks the TensorArray gradient flow. +// +// The solution is to identify which gradient call this particular +// TensorArray gradient is being called in. This is performed by identifying +// a unique string (e.g. "gradients", "gradients_1", ...) from the input +// gradient Tensor's name. This string is used as a suffix when creating +// the TensorArray gradient object here (the attribute `source`). +// +// The attribute `source` is added as a suffix to the forward TensorArray's +// name when performing the creation / lookup, so that each separate gradient +// calculation gets its own TensorArray accumulator. +// +// Arguments: +// handle: The handle to the forward TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. +// source: The gradient source string, used to decide which gradient TensorArray +// to return. +func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "ExperimentalSlidingWindowDataset", + Type: "TensorArrayGradV3", Input: []tf.Input{ - input_dataset, window_size, window_shift, window_stride, + handle, flow_in, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Returns which elements of x are finite. +// Pop the element at the top of the stack. // -// @compatibility(numpy) -// Equivalent to np.isfinite -// @end_compatibility -func IsFinite(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// handle: The handle to a stack. +// elem_type: The type of the elem that is popped. +// +// Returns The tensor that is popped from the top of the stack. +func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"elem_type": elem_type} opspec := tf.OpSpec{ - Type: "IsFinite", + Type: "StackPopV2", Input: []tf.Input{ - x, + handle, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. -type ResourceStridedSliceAssignAttr func(optionalAttr) - -// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["begin_mask"] = value - } -} - -// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["end_mask"] = value - } -} - -// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["ellipsis_mask"] = value - } -} - -// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr { - return func(m optionalAttr) { - m["new_axis_mask"] = value - } -} +// OneHotAttr is an optional argument to OneHot. +type OneHotAttr func(optionalAttr) -// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value. -// If not specified, defaults to 0 -func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr { +// OneHotAxis sets the optional axis attribute to value. +// +// value: The axis to fill (default: -1, a new inner-most axis). +// If not specified, defaults to -1 +func OneHotAxis(value int64) OneHotAttr { return func(m optionalAttr) { - m["shrink_axis_mask"] = value + m["axis"] = value } } -// Assign `value` to the sliced l-value reference of `ref`. +// Returns a one-hot tensor. // -// The values of `value` are assigned to the positions in the variable -// `ref` that are selected by the slice parameters. The slice parameters -// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`. +// The locations represented by indices in `indices` take value `on_value`, +// while all other locations take value `off_value`. // -// NOTE this op currently does not support broadcasting and so `value`'s -// shape must be exactly the shape produced by the slice of `ref`. +// If the input `indices` is rank `N`, the output will have rank `N+1`, +// The new axis is created at dimension `axis` (default: the new axis is +// appended at the end). // -// Returns the created operation. -func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) { +// If `indices` is a scalar the output shape will be a vector of length `depth`. +// +// If `indices` is a vector of length `features`, the output shape will be: +// ``` +// features x depth if axis == -1 +// depth x features if axis == 0 +// ``` +// +// If `indices` is a matrix (batch) with shape `[batch, features]`, +// the output shape will be: +// ``` +// batch x features x depth if axis == -1 +// batch x depth x features if axis == 1 +// depth x batch x features if axis == 0 +// ``` +// +// +// Examples +// ========= +// +// Suppose that +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 5.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[4 x 3]`: +// ``` +// output = +// [5.0 0.0 0.0] // one_hot(0) +// [0.0 0.0 5.0] // one_hot(2) +// [0.0 0.0 0.0] // one_hot(-1) +// [0.0 5.0 0.0] // one_hot(1) +// ``` +// +// Suppose that +// ``` +// indices = [0, 2, -1, 1] +// depth = 3 +// on_value = 0.0 +// off_value = 3.0 +// axis = 0 +// ``` +// +// Then output is `[3 x 4]`: +// ``` +// output = +// [0.0 3.0 3.0 3.0] +// [3.0 3.0 3.0 0.0] +// [3.0 3.0 3.0 3.0] +// [3.0 0.0 3.0 3.0] +// // ^ one_hot(0) +// // ^ one_hot(2) +// // ^ one_hot(-1) +// // ^ one_hot(1) +// ``` +// +// Suppose that +// ``` +// indices = [[0, 2], [1, -1]] +// depth = 3 +// on_value = 1.0 +// off_value = 0.0 +// axis = -1 +// ``` +// +// Then output is `[2 x 2 x 3]`: +// ``` +// output = +// [ +// [1.0, 0.0, 0.0] // one_hot(0) +// [0.0, 0.0, 1.0] // one_hot(2) +// ][ +// [0.0, 1.0, 0.0] // one_hot(1) +// [0.0, 0.0, 0.0] // one_hot(-1) +// ] +// ``` +// +// Arguments: +// indices: A tensor of indices. +// depth: A scalar defining the depth of the one hot dimension. +// on_value: A scalar defining the value to fill in output when `indices[j] = i`. +// off_value: A scalar defining the value to fill in output when `indices[j] != i`. +// +// Returns The one-hot tensor. +func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -7247,163 +6722,148 @@ func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, en a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceStridedSliceAssign", + Type: "OneHot", Input: []tf.Input{ - ref, begin, end, strides, value, + indices, depth, on_value, off_value, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// ArgMaxAttr is an optional argument to ArgMax. -type ArgMaxAttr func(optionalAttr) - -// ArgMaxOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMaxOutputType(value tf.DataType) ArgMaxAttr { - return func(m optionalAttr) { - m["output_type"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the index with the largest value across dimensions of a tensor. -// -// Note that in case of ties the identity of the return value is not guaranteed. +// Computes the number of elements in the given queue. // // Arguments: +// handle: The handle to a queue. // -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) { +// Returns The number of elements in the given queue. +func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ArgMax", + Type: "QueueSizeV2", Input: []tf.Input{ - input, dimension, + handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// PreventGradientAttr is an optional argument to PreventGradient. -type PreventGradientAttr func(optionalAttr) +// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2. +type QueueDequeueManyV2Attr func(optionalAttr) -// PreventGradientMessage sets the optional message attribute to value. +// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: Will be printed in the error when anyone tries to differentiate -// this operation. -// If not specified, defaults to "" -func PreventGradientMessage(value string) PreventGradientAttr { +// value: If the queue has fewer than n elements, this operation +// will block for up to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr { return func(m optionalAttr) { - m["message"] = value + m["timeout_ms"] = value } } -// An identity op that triggers an error if a gradient is requested. +// Dequeues `n` tuples of one or more tensors from the given queue. // -// When executed in a graph, this op outputs its input tensor as-is. +// If the queue is closed and there are fewer than `n` elements, then an +// OutOfRange error is returned. // -// When building ops to compute gradients, the TensorFlow gradient system -// will return an error when trying to lookup the gradient of this op, -// because no gradient must ever be registered for this function. This -// op exists to prevent subtle bugs from silently returning unimplemented -// gradients in some corner cases. +// This operation concatenates queue-element component tensors along the +// 0th dimension to make a single component tensor. All of the components +// in the dequeued tuple will have size `n` in the 0th dimension. +// +// This operation has `k` outputs, where `k` is the number of components in +// the tuples stored in the given queue, and output `i` is the ith +// component of the dequeued tuple. +// +// N.B. If the queue is empty, this operation will block until `n` elements +// have been dequeued (or 'timeout_ms' elapses, if specified). // // Arguments: -// input: any tensor. +// handle: The handle to a queue. +// n: The number of tuples to dequeue. +// component_types: The type of each component in a tuple. // -// Returns the same input tensor. -func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) { +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "PreventGradient", + Type: "QueueDequeueManyV2", Input: []tf.Input{ - input, + handle, n, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes asin of x element-wise. -func Asin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Asin", - Input: []tf.Input{ - x, - }, + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueManyV2", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return components } -// SparseToDenseAttr is an optional argument to SparseToDense. -type SparseToDenseAttr func(optionalAttr) +// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. +type QuantizeAndDequantizeAttr func(optionalAttr) -// SparseToDenseValidateIndices sets the optional validate_indices attribute to value. -// -// value: If true, indices are checked to make sure they are sorted in -// lexicographic order and that there are no repeats. +// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. // If not specified, defaults to true -func SparseToDenseValidateIndices(value bool) SparseToDenseAttr { +func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["signed_input"] = value } } -// Converts a sparse representation into a dense tensor. -// -// Builds an array `dense` with shape `output_shape` such that -// -// ``` -// # If sparse_indices is scalar -// dense[i] = (i == sparse_indices ? sparse_values : default_value) -// -// # If sparse_indices is a vector, then for each i -// dense[sparse_indices[i]] = sparse_values[i] -// -// # If sparse_indices is an n by d matrix, then for each i in [0, n) -// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i] -// ``` -// -// All other values in `dense` are set to `default_value`. If `sparse_values` is a -// scalar, all sparse indices are set to this single value. -// -// Indices should be sorted in lexicographic order, and indices must not -// contain any repeats. If `validate_indices` is true, these properties -// are checked during execution. -// -// Arguments: -// sparse_indices: 0-D, 1-D, or 2-D. `sparse_indices[i]` contains the complete -// index where `sparse_values[i]` will be placed. -// output_shape: 1-D. Shape of the dense output tensor. -// sparse_values: 1-D. Values corresponding to each row of `sparse_indices`, -// or a scalar value to be used for all sparse indices. -// default_value: Scalar value to set for indices not specified in -// `sparse_indices`. +// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to false +func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["range_given"] = value + } +} + +// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_min"] = value + } +} + +// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. +// If not specified, defaults to 0 +func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { + return func(m optionalAttr) { + m["input_max"] = value + } +} + +// Use QuantizeAndDequantizeV2 instead. // -// Returns Dense output tensor of shape `output_shape`. -func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) { +// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 +func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -7412,9 +6872,9 @@ func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Outpu a(attrs) } opspec := tf.OpSpec{ - Type: "SparseToDense", + Type: "QuantizeAndDequantize", Input: []tf.Input{ - sparse_indices, output_shape, sparse_values, default_value, + input, }, Attrs: attrs, } @@ -7422,195 +6882,254 @@ func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Outpu return op.Output(0) } -// Computes the sum along sparse segments of a tensor. -// -// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. +// Returns locations of nonzero / true values in a tensor. // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation) -// for an explanation of segments. +// This operation returns the coordinates of true elements in `condition`. The +// coordinates are returned in a 2-D tensor where the first dimension (rows) +// represents the number of true elements, and the second dimension (columns) +// represents the coordinates of the true elements. Keep in mind, the shape of +// the output tensor can vary depending on how many true values there are in +// `condition`. Indices are output in row-major order. // // For example: // -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) -// -// tf.sparse_segment_sum_with_num_segments( -// c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3) -// # => [[0 0 0 0] -// # [0 0 0 0] -// # [0 0 0 0]] -// -// tf.sparse_segment_sum_with_num_segments(c, -// tf.constant([0, 1]), -// tf.constant([0, 2], -// num_segments=4)) -// # => [[ 1 2 3 4] -// # [ 0 0 0 0] -// # [-1 -2 -3 -4] -// # [ 0 0 0 0]] // ``` +// # 'input' tensor is [[True, False] +// # [True, False]] +// # 'input' has two true values, so output has two coordinates. +// # 'input' has rank of 2, so coordinates have two indices. +// where(input) ==> [[0, 0], +// [1, 0]] // -// Arguments: +// # `condition` tensor is [[[True, False] +// # [True, False]] +// # [[False, True] +// # [False, True]] +// # [[False, False] +// # [False, True]]] +// # 'input' has 5 true values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. +// # `condition` tensor is [[[1.5, 0.0] +// # [-0.5, 0.0]] +// # [[0.0, 0.25] +// # [0.0, 0.75]] +// # [[0.0, 0.0] +// # [0.0, 0.01]]] +// # 'input' has 5 nonzero values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] // -// Returns Has same shape as data, except for dimension 0 which -// has size `num_segments`. -func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.5j, 0.0 + 0.0j]] +// # [[0.0 + 0.0j, 0.25 + 1.5j] +// # [0.0 + 0.0j, 0.75 + 0.0j]] +// # [[0.0 + 0.0j, 0.0 + 0.0j] +// # [0.0 + 0.0j, 0.01 + 0.0j]]] +// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. +// # 'input' has rank of 3, so coordinates have three indices. +// where(input) ==> [[0, 0, 0], +// [0, 1, 0], +// [1, 0, 1], +// [1, 1, 1], +// [2, 1, 1]] +// ``` +func Where(scope *Scope, condition tf.Output) (index tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentSumWithNumSegments", + Type: "Where", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + condition, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the determinant of one or more square matrices. +// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. +type QueueDequeueV2Attr func(optionalAttr) + +// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. // -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor containing the determinants -// for all input submatrices `[..., :, :]`. +// value: If the queue is empty, this operation will block for up to +// timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { + return func(m optionalAttr) { + m["timeout_ms"] = value + } +} + +// Dequeues a tuple of one or more tensors from the given queue. +// +// This operation has k outputs, where k is the number of components +// in the tuples stored in the given queue, and output i is the ith +// component of the dequeued tuple. +// +// N.B. If the queue is empty, this operation will block until an element +// has been dequeued (or 'timeout_ms' elapses, if specified). // // Arguments: -// input: Shape is `[..., M, M]`. +// handle: The handle to a queue. +// component_types: The type of each component in a tuple. // -// Returns Shape is `[...]`. -func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "MatrixDeterminant", + Type: "QueueDequeueV2", Input: []tf.Input{ - input, + handle, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes sin of x element-wise. -func Sin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Sin", - Input: []tf.Input{ - x, - }, + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueV2", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return components } -// Computes Psi, the derivative of Lgamma (the log of the absolute value of +// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2. +type QueueEnqueueV2Attr func(optionalAttr) + +// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value. // -// `Gamma(x)`), element-wise. -func Digamma(scope *Scope, x tf.Output) (y tf.Output) { +// value: If the queue is full, this operation will block for up to +// timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr { + return func(m optionalAttr) { + m["timeout_ms"] = value + } +} + +// Enqueues a tuple of one or more tensors in the given queue. +// +// The components input has k elements, which correspond to the components of +// tuples stored in the given queue. +// +// N.B. If the queue is full, this operation will block until the given +// element has been enqueued (or 'timeout_ms' elapses, if specified). +// +// Arguments: +// handle: The handle to a queue. +// components: One or more tensors from which the enqueued tensors should be taken. +// +// Returns the created operation. +func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Digamma", + Type: "QueueEnqueueV2", Input: []tf.Input{ - x, + handle, tf.OutputList(components), }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter. -type Conv2DBackpropFilterAttr func(optionalAttr) +// MfccAttr is an optional argument to Mfcc. +type MfccAttr func(optionalAttr) -// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr { +// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. +// +// value: The highest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 4000 +func MfccUpperFrequencyLimit(value float32) MfccAttr { return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value + m["upper_frequency_limit"] = value } } -// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. +// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. // -// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith -// dimension, the amount of padding inserted before and after the dimension is -// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If -// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. -// If not specified, defaults to <> -func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr { +// value: The lowest frequency to use when calculating the +// ceptstrum. +// If not specified, defaults to 20 +func MfccLowerFrequencyLimit(value float32) MfccAttr { return func(m optionalAttr) { - m["explicit_paddings"] = value + m["lower_frequency_limit"] = value } } -// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value. +// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { +// value: Resolution of the Mel bank used internally. +// If not specified, defaults to 40 +func MfccFilterbankChannelCount(value int64) MfccAttr { return func(m optionalAttr) { - m["data_format"] = value + m["filterbank_channel_count"] = value } } -// Conv2DBackpropFilterDilations sets the optional dilations attribute to value. +// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { +// value: How many output channels to produce per time slice. +// If not specified, defaults to 13 +func MfccDctCoefficientCount(value int64) MfccAttr { return func(m optionalAttr) { - m["dilations"] = value + m["dct_coefficient_count"] = value } } -// Computes the gradients of convolution with respect to the filter. +// Transforms a spectrogram into a form that's useful for speech recognition. // -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, out_channels]` tensor. -// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. Must be in the same order as the dimension specified with -// format. -// padding: The type of padding algorithm to use. +// Mel Frequency Cepstral Coefficients are a way of representing audio data that's +// been effective as an input feature for machine learning. They are created by +// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the +// higher frequencies that are less significant to the human ear. They have a long +// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum +// is a good resource to learn more. // -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) { +// Arguments: +// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared +// set to true. +// sample_rate: How many samples per second the source audio used. +func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv2DBackpropFilter", + Type: "Mfcc", Input: []tf.Input{ - input, filter_sizes, out_backprop, + spectrogram, sample_rate, }, Attrs: attrs, } @@ -7618,525 +7137,391 @@ func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, return op.Output(0) } -// Returns the number of work units this Reader has finished processing. +// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. +type PaddingFIFOQueueV2Attr func(optionalAttr) + +// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. // -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderNumWorkUnitsCompletedV2", - Input: []tf.Input{ - reader_handle, - }, +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. +// Shapes of fixed rank but variable size are allowed by setting +// any shape dimension to -1. In this case, the inputs' shape may vary along +// the given dimension, and DequeueMany will pad the given dimension with +// zeros up to the maximum shape of all elements in the given batch. +// If the length of this attr is 0, different queue elements may have +// different ranks and shapes, but only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a dataset that contains the elements of `input_dataset` ignoring errors. -func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalIgnoreErrorsDataset", - Input: []tf.Input{ - input_dataset, - }, - Attrs: attrs, +// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the log of the absolute value of `Gamma(x)` element-wise. -func Lgamma(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// PaddingFIFOQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value } - opspec := tf.OpSpec{ - Type: "Lgamma", - Input: []tf.Input{ - x, - }, +} + +// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the reverse mode backpropagated gradient of the Cholesky algorithm. +// A queue that produces elements in first-in first-out order. // -// For an explanation see "Differentiation of the Cholesky algorithm" by -// Iain Murray http://arxiv.org/abs/1602.07527. +// Variable-size shapes are allowed by setting the corresponding shape dimensions +// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum +// size of any given element in the minibatch. See below for details. // // Arguments: -// l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. -// grad: df/dl where f is some scalar function. Shape is `[..., M, M]`. -// Algorithm depends only on lower triangular part of the innermost matrices of -// this tensor. +// component_types: The type of each component in a value. // -// Returns Symmetrized version of df/dA . Shape is `[..., M, M]` -func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) { +// Returns The handle to the queue. +func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "CholeskyGrad", - Input: []tf.Input{ - l, grad, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that emits each dim-0 slice of `components` once. -func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) } - attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "TensorSliceDataset", - Input: []tf.Input{ - tf.OutputList(components), - }, + Type: "PaddingFIFOQueueV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the index of a data point that should be added to the seed set. +// Interleave the values from the `data` tensors into a single tensor. // -// Entries in distances are assumed to be squared distances of candidate points to -// the already sampled centers in the seed set. The op constructs one Markov chain -// of the k-MC^2 algorithm and returns the index of one candidate point to be added -// as an additional cluster center. +// Builds a merged tensor such that // -// Arguments: -// distances: Vector with squared distances to the closest previously sampled cluster center -// for each candidate point. -// seed: Scalar. Seed for initializing the random number generator. +// ```python +// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] +// ``` // -// Returns Scalar with the index of the sampled point. -func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "KMC2ChainInitialization", - Input: []tf.Input{ - distances, seed, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes hyperbolic sine of x element-wise. -func Sinh(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sinh", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the sum along sparse segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// For example, if each `indices[m]` is scalar or vector, we have // -// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. +// ```python +// # Scalar indices: +// merged[indices[m], ...] = data[m][...] // -// For example: +// # Vector indices: +// merged[indices[m][i], ...] = data[m][i, ...] +// ``` // -// ```python -// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) +// Each `data[i].shape` must start with the corresponding `indices[i].shape`, +// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we +// must have `data[i].shape = indices[i].shape + constant`. In terms of this +// `constant`, the output shape is // -// # Select two rows, one segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0])) -// # => [[0 0 0 0]] +// merged.shape = [max(indices)] + constant // -// # Select two rows, two segment. -// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1])) -// # => [[ 1 2 3 4] -// # [-1 -2 -3 -4]] +// Values may be merged in parallel, so if an index appears in both `indices[m][i]` +// and `indices[n][j]`, the result may be invalid. This differs from the normal +// DynamicStitch operator that defines the behavior in that case. // -// # Select all rows, two segments. -// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1])) -// # => [[0 0 0 0] -// # [5 6 7 8]] +// For example: // -// # Which is equivalent to: -// tf.segment_sum(c, tf.constant([0, 0, 1])) +// ```python +// indices[0] = 6 +// indices[1] = [4, 1] +// indices[2] = [[5, 2], [0, 3]] +// data[0] = [61, 62] +// data[1] = [[41, 42], [11, 12]] +// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] +// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], +// [51, 52], [61, 62]] // ``` // -// Arguments: +// This method can be used to merge partitions created by `dynamic_partition` +// as illustrated on the following example: // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// ```python +// # Apply function (increments x_i) on elements for which a certain condition +// # apply (x_i != -1 in this example). +// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) +// condition_mask=tf.not_equal(x,tf.constant(-1.)) +// partitioned_data = tf.dynamic_partition( +// x, tf.cast(condition_mask, tf.int32) , 2) +// partitioned_data[1] = partitioned_data[1] + 1.0 +// condition_indices = tf.dynamic_partition( +// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) +// x = tf.dynamic_stitch(condition_indices, partitioned_data) +// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain +// # unchanged. +// ``` // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +//
+// +//
+func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentSum", + Type: "ParallelDynamicStitch", Input: []tf.Input{ - data, indices, segment_ids, + tf.OutputList(indices), tf.OutputList(data), }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes natural logarithm of x element-wise. +// Partitions `data` into `num_partitions` tensors using indices from `partitions`. // -// I.e., \\(y = \log_e x\\). -func Log(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Log", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Rounds the values of a tensor to the nearest integer, element-wise. +// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` +// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` +// are placed in `outputs[i]` in lexicographic order of `js`, and the first +// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. +// In detail, // -// Rounds half to even. Also known as bankers rounding. If you want to round -// according to the current system rounding mode use std::cint. -func Round(scope *Scope, x tf.Output) (y tf.Output) { +// ```python +// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// +// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) +// ``` +// +// `data.shape` must start with `partitions.shape`. +// +// For example: +// +// ```python +// # Scalar partitions. +// partitions = 1 +// num_partitions = 2 +// data = [10, 20] +// outputs[0] = [] # Empty with shape [0, 2] +// outputs[1] = [[10, 20]] +// +// # Vector partitions. +// partitions = [0, 0, 1, 1, 0] +// num_partitions = 2 +// data = [10, 20, 30, 40, 50] +// outputs[0] = [10, 20, 50] +// outputs[1] = [30, 40] +// ``` +// +// See `dynamic_stitch` for an example on how to merge partitions back. +// +//
+// +//
+// +// Arguments: +// +// partitions: Any shape. Indices in the range `[0, num_partitions)`. +// num_partitions: The number of partitions to output. +func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_partitions": num_partitions} opspec := tf.OpSpec{ - Type: "Round", + Type: "DynamicPartition", Input: []tf.Input{ - x, + data, partitions, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes reciprocal of square root of x element-wise. -// -// I.e., \\(y = 1 / \sqrt{x}\\). -func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Rsqrt", - Input: []tf.Input{ - x, - }, + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("DynamicPartition", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return outputs } -// Get the value of the tensor specified by its handle. +// Produces a string handle for the given MultiDeviceIterator. // // Arguments: -// handle: The handle for a tensor stored in the session state. -// dtype: The type of the output value. +// multi_device_iterator: A MultiDeviceIterator resource. // -// Returns The tensor for the given handle. -func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) { +// Returns A string representing the resource. +func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "GetSessionTensor", + Type: "MultiDeviceIteratorToStringHandle", Input: []tf.Input{ - handle, + multi_device_iterator, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradient for the sqrt of `x` wrt its input. +// Checks whether a tree has been initialized. // -// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` -// is the corresponding input gradient. -func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// Arguments: +// tree_handle: Handle to the tree. +// +// Returns Whether the tree is initialized. +func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_initialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SqrtGrad", + Type: "TensorForestTreeIsInitializedOp", Input: []tf.Input{ - y, dy, + tree_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixInverseAttr is an optional argument to MatrixInverse. -type MatrixInverseAttr func(optionalAttr) - -// MatrixInverseAdjoint sets the optional adjoint attribute to value. -// If not specified, defaults to false -func MatrixInverseAdjoint(value bool) MatrixInverseAttr { - return func(m optionalAttr) { - m["adjoint"] = value - } -} - -// Computes the inverse of one or more square invertible matrices or their -// -// adjoints (conjugate transposes). -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the inverse for all input submatrices `[..., :, :]`. -// -// The op uses LU decomposition with partial pivoting to compute the inverses. -// -// If a matrix is not invertible there is no guarantee what the op does. It -// may detect the condition and raise an exception or it may simply return a -// garbage result. +// Gets next element for the provided shard number. // // Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. +// multi_device_iterator: A MultiDeviceIterator resource. +// shard_num: Integer representing which shard to fetch data for. +// incarnation_id: Which incarnation of the MultiDeviceIterator is running. +// output_types: The type list for the return values. +// output_shapes: The list of shapes being produced. // -// @compatibility(numpy) -// Equivalent to np.linalg.inv -// @end_compatibility -func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) { +// Returns Result of the get_next on the dataset. +func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "MatrixInverse", + Type: "MultiDeviceIteratorGetNextFromShard", Input: []tf.Input{ - input, + multi_device_iterator, shard_num, incarnation_id, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes square of x element-wise. -// -// I.e., \\(y = x * x = x^2\\). -func Square(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Square", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. -// -// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) -// ](http://arxiv.org/abs/1511.07289) -func Elu(scope *Scope, features tf.Output) (activations tf.Output) { - if scope.Err() != nil { + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err) return } - opspec := tf.OpSpec{ - Type: "Elu", - Input: []tf.Input{ - features, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) + return components } -// Computes the reciprocal of x element-wise. +// Initializes the multi device iterator with the given dataset. // -// I.e., \\(y = 1 / x\\). -func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// dataset: Dataset to be iterated upon. +// multi_device_iterator: A MultiDeviceIteratorResource. +// max_buffer_size: The maximum size of the host side per device buffer to keep. +// +// Returns An int64 indicating which incarnation of the MultiDeviceIterator +// is running. +func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Reciprocal", + Type: "MultiDeviceIteratorInit", Input: []tf.Input{ - x, + dataset, multi_device_iterator, max_buffer_size, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns a batched matrix tensor with new batched diagonal values. +// Copy a tensor setting everything outside a central band in each innermost matrix // -// Given `input` and `diagonal`, this operation returns a tensor with the -// same shape and values as `input`, except for the main diagonal of the -// innermost matrices. These will be overwritten by the values in `diagonal`. +// to zero. // -// The output is computed as follows: +// The `band` part is computed as follows: +// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a +// tensor with the same shape where // -// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has -// `k` dimensions `[I, J, K, ..., min(M, N)]`. Then the output is a -// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where: +// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. // -// * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`. -// * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`. +// The indicator function // -// Arguments: -// input: Rank `k+1`, where `k >= 1`. -// diagonal: Rank `k`, where `k >= 1`. +// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && +// (num_upper < 0 || (n-m) <= num_upper)`. // -// Returns Rank `k+1`, with `output.shape = input.shape`. -func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MatrixSetDiag", - Input: []tf.Input{ - input, diagonal, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the element-wise max of two SparseTensors. +// For example: // -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. +// ``` +// # if 'input' is [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [-2, -1, 0, 1] +// [-3, -2, -1, 0]], // -// Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. +// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] +// [-1, 0, 1, 2] +// [ 0, -1, 0, 1] +// [ 0, 0, -1, 0]], // -// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. -func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSparseMaximum", - Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// Computes the reciprocal of x element-wise. +// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] +// [-1, 0, 1, 0] +// [-2, -1, 0, 1] +// [ 0, -2, -1, 0]] +// ``` // -// I.e., \\(y = 1 / x\\). -func Inv(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Inv", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Creates a dataset that batches input elements into a SparseTensor. +// Useful special cases: // -// Arguments: -// input_dataset: A handle to an input dataset. Must have a single component. -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// row_shape: A vector representing the dense shape of each row in the produced -// SparseTensor. The shape may be partially specified, using `-1` to indicate -// that a particular dimension should use the maximum size of all batch elements. +// ``` +// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. +// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. +// tf.matrix_band_part(input, 0, 0) ==> Diagonal. +// ``` // +// Arguments: +// input: Rank `k` tensor. +// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire +// lower triangle. +// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep +// entire upper triangle. // -func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ExperimentalDenseToSparseBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, row_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CastAttr is an optional argument to Cast. -type CastAttr func(optionalAttr) - -// CastTruncate sets the optional Truncate attribute to value. -// If not specified, defaults to false -func CastTruncate(value bool) CastAttr { - return func(m optionalAttr) { - m["Truncate"] = value - } -} - -// Cast x of type SrcT to y of DstT. -func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) { +// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. +func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"DstT": DstT} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Cast", + Type: "MatrixBandPart", Input: []tf.Input{ - x, + input, num_lower, num_upper, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) @@ -8215,60 +7600,6 @@ func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.O return op.Output(0) } -// ComplexAbsAttr is an optional argument to ComplexAbs. -type ComplexAbsAttr func(optionalAttr) - -// ComplexAbsTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ComplexAbsTout(value tf.DataType) ComplexAbsAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Computes the complex absolute value of a tensor. -// -// Given a tensor `x` of complex numbers, this operation returns a tensor of type -// `float` or `double` that is the absolute value of each element in `x`. All -// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute -// value is computed as \\( \sqrt{a^2 + b^2}\\). -func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ComplexAbs", - Input: []tf.Input{ - x, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of x AND y element-wise. -// -// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalAnd", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Quantized Batch normalization. // // This op is deprecated and will be removed in the future. Prefer @@ -8707,24 +8038,6 @@ func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) { return op.Output(0) } -// Returns the truth value of (x <= y) element-wise. -// -// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LessEqual", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Computes softmax activations. // // For each batch `i` and class `j` we have @@ -8794,85 +8107,33 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima return op.Output(0) } -// BatchMatMulAttr is an optional argument to BatchMatMul. -type BatchMatMulAttr func(optionalAttr) - -// BatchMatMulAdjX sets the optional adj_x attribute to value. -// -// value: If `True`, adjoint the slices of `x`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulAdjX(value bool) BatchMatMulAttr { - return func(m optionalAttr) { - m["adj_x"] = value - } -} - -// BatchMatMulAdjY sets the optional adj_y attribute to value. -// -// value: If `True`, adjoint the slices of `y`. Defaults to `False`. -// If not specified, defaults to false -func BatchMatMulAdjY(value bool) BatchMatMulAttr { - return func(m optionalAttr) { - m["adj_y"] = value - } -} - -// Multiplies slices of two tensors in batches. -// -// Multiplies all slices of `Tensor` `x` and `y` (each slice can be -// viewed as an element of a batch), and arranges the individual results -// in a single output tensor of the same batch size. Each of the -// individual slices can optionally be adjointed (to adjoint a matrix -// means to transpose and conjugate it) before multiplication by setting -// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. -// -// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` -// and `[..., r_y, c_y]`. -// -// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: -// -// r_o = c_x if adj_x else r_x -// c_o = r_y if adj_y else c_y -// -// It is computed as: -// -// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) -// -// Arguments: -// x: 2-D or higher with shape `[..., r_x, c_x]`. -// y: 2-D or higher with shape `[..., r_y, c_y]`. +// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise. // -// Returns 3-D or higher with shape `[..., r_o, c_o]` -func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) { +// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs) +// ](http://arxiv.org/abs/1511.07289) +func Elu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "BatchMatMul", + Type: "Elu", Input: []tf.Input{ - x, y, + features, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns which elements of x are NaN. +// Computes square of x element-wise. // -// @compatibility(numpy) -// Equivalent to np.isnan -// @end_compatibility -func IsNan(scope *Scope, x tf.Output) (y tf.Output) { +// I.e., \\(y = x * x = x^2\\). +func Square(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IsNan", + Type: "Square", Input: []tf.Input{ x, }, @@ -8881,125 +8142,6 @@ func IsNan(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } -// Identity op for gradient debugging. -// -// This op is hidden from public in Python. It is used by TensorFlow Debugger to -// register gradient tensors for gradient debugging. -// This op operates on non-reference-type tensors. -func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DebugGradientIdentity", - Input: []tf.Input{ - input, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta. -type ResourceSparseApplyAdadeltaAttr func(optionalAttr) - -// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// var: Should be from a Variable(). -// -// Arguments: -// -// accum: Should be from a Variable(). -// accum_update: : Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// -// Returns the created operation. -func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdadelta", - Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, indices, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Checks whether a tree has been initialized. -// -// Arguments: -// tree_handle: Handle to the tree. -// -// Returns Whether the tree is initialized. -func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_initialized tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeIsInitializedOp", - Input: []tf.Input{ - tree_handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Gets next element for the provided shard number. -// -// Arguments: -// multi_device_iterator: A MultiDeviceIterator resource. -// shard_num: Integer representing which shard to fetch data for. -// incarnation_id: Which incarnation of the MultiDeviceIterator is running. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -// -// Returns Result of the get_next on the dataset. -func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorGetNextFromShard", - Input: []tf.Input{ - multi_device_iterator, shard_num, incarnation_id, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err) - return - } - return components -} - // LeakyReluGradAttr is an optional argument to LeakyReluGrad. type LeakyReluGradAttr func(optionalAttr) @@ -9038,23 +8180,6 @@ func LeakyReluGrad(scope *Scope, gradients tf.Output, features tf.Output, option return op.Output(0) } -// Deprecated. Use TensorArrayGradV3 -// -// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3 -func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayWriteV2", - Input: []tf.Input{ - handle, index, value, flow_in, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // LeakyReluAttr is an optional argument to LeakyRelu. type LeakyReluAttr func(optionalAttr) @@ -9320,35 +8445,6 @@ func Equal(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { return op.Output(0) } -// Computes the gradient of morphological 2-D dilation with respect to the input. -// -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. -// -// Returns 4-D with shape `[batch, in_height, in_width, depth]`. -func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} - opspec := tf.OpSpec{ - Type: "Dilation2DBackpropInput", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Compute the polygamma function \\(\psi^{(n)}(x)\\). // // The polygamma function is defined as: @@ -9371,35 +8467,6 @@ func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { return op.Output(0) } -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// input: The original input. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the -// input of `max_pool`. -// argmax: The indices of the maximum values chosen for each output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input of `max_pool`. -func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "MaxPoolGradGradWithArgmax", - Input: []tf.Input{ - input, grad, argmax, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2. type MaxPoolGradGradV2Attr func(optionalAttr) @@ -9522,6 +8589,21 @@ func MutexV2(scope *Scope, optional ...MutexV2Attr) (resource tf.Output) { return op.Output(0) } +// Connects N inputs to an N-way replicated TPU computation. +func TPUReplicatedInput(scope *Scope, inputs []tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TPUReplicatedInput", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // AvgPool3DAttr is an optional argument to AvgPool3D. type AvgPool3DAttr func(optionalAttr) @@ -9569,27 +8651,6 @@ func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } -// Returns element-wise remainder of division. This emulates C semantics in that -// -// the result here is consistent with a truncating divide. E.g. -// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. -// -// *NOTE*: `Mod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Mod", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // DepthToSpaceAttr is an optional argument to DepthToSpace. type DepthToSpaceAttr func(optionalAttr) @@ -9778,37 +8839,20 @@ func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output return op.Output(0) } -// Computes square root of x element-wise. -// -// I.e., \\(y = \sqrt{x} = x^{1/2}\\). -func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sqrt", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter. -type Conv3DBackpropFilterAttr func(optionalAttr) +// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput. +type Conv3DBackpropInputAttr func(optionalAttr) -// Conv3DBackpropFilterDilations sets the optional dilations attribute to value. +// Conv3DBackpropInputDilations sets the optional dilations attribute to value. // If not specified, defaults to -func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { +func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { return func(m optionalAttr) { m["dilations"] = value } } -// Computes the gradients of 3-D convolution with respect to the filter. +// Computes the gradients of 3-D convolution with respect to the input. // -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 // // Arguments: // input: Shape `[batch, depth, rows, cols, in_channels]`. @@ -9819,7 +8863,7 @@ func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { // strides: 1-D tensor of length 5. The stride of the sliding window for each // dimension of `input`. Must have `strides[0] = strides[4] = 1`. // padding: The type of padding algorithm to use. -func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) { +func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -9828,7 +8872,7 @@ func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_b a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilter", + Type: "Conv3DBackpropInput", Input: []tf.Input{ input, filter, out_backprop, }, @@ -9838,24 +8882,6 @@ func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_b return op.Output(0) } -// Computes the gradient for the rsqrt of `x` wrt its input. -// -// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy` -// is the corresponding input gradient. -func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RsqrtGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative. type DepthwiseConv2dNativeAttr func(optionalAttr) @@ -9933,78 +8959,6 @@ func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, stri return op.Output(0) } -// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. -type MaxPoolGradV2Attr func(optionalAttr) - -// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients w.r.t. the output of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients w.r.t. the input to `max_pool`. -func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradV2", - Input: []tf.Input{ - orig_input, orig_output, grad, ksize, strides, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Restore a reader to a previously saved state. -// -// Not all Readers support being restored, so this can produce an -// Unimplemented error. -// -// Arguments: -// reader_handle: Handle to a Reader. -// state: Result of a ReaderSerializeState of a Reader with type -// matching reader_handle. -// -// Returns the created operation. -func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderRestoreStateV2", - Input: []tf.Input{ - reader_handle, state, - }, - } - return scope.AddOperation(opspec) -} - // MaxPoolGradAttr is an optional argument to MaxPoolGrad. type MaxPoolGradAttr func(optionalAttr) @@ -10136,6 +9090,129 @@ func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Ou return op.Output(0) } +// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter. +type Conv2DBackpropFilterAttr func(optionalAttr) + +// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. +// If not specified, defaults to true +func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["use_cudnn_on_gpu"] = value + } +} + +// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value. +// +// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith +// dimension, the amount of padding inserted before and after the dimension is +// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If +// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. +// If not specified, defaults to <> +func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["explicit_paddings"] = value + } +} + +// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Conv2DBackpropFilterDilations sets the optional dilations attribute to value. +// +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of convolution with respect to the filter. +// +// Arguments: +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, out_channels]` tensor. +// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. Must be in the same order as the dimension specified with +// format. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Conv2DBackpropFilter", + Input: []tf.Input{ + input, filter_sizes, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes Psi, the derivative of Lgamma (the log of the absolute value of +// +// `Gamma(x)`), element-wise. +func Digamma(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Digamma", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the number of work units this Reader has finished processing. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumWorkUnitsCompletedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Conv2DAttr is an optional argument to Conv2D. type Conv2DAttr func(optionalAttr) @@ -10306,36 +9383,66 @@ func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dens return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Reduces `input` from `num_devices` using `reduction` to a single device. +// LoadTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingADAMParametersGradAccumDebug. +type LoadTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// Reduces `input` from `num_devices` using `reduction` to a single device. +// REQUIRES: value >= -1 +func LoadTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load ADAM embedding parameters with debug support. // -// The graph should be constructed so that all inputs have a valid device -// assignment, and the op itself is assigned one of these devices. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// input: The input to the reduction. -// data: the value of the reduction across all `num_devices` devices. -// reduction: the reduction operation to perform. -func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) { +// Arguments: +// parameters: Value of parameters used in the ADAM optimization algorithm. +// momenta: Value of momenta used in the ADAM optimization algorithm. +// velocities: Value of velocities used in the ADAM optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the ADAM optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"reduction": reduction} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "NcclReduce", + Type: "LoadTPUEmbeddingADAMParametersGradAccumDebug", Input: []tf.Input{ - tf.OutputList(input), + parameters, momenta, velocities, gradient_accumulators, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// BiasAddGradAttr is an optional argument to BiasAddGrad. -type BiasAddGradAttr func(optionalAttr) +// BiasAddAttr is an optional argument to BiasAdd. +type BiasAddAttr func(optionalAttr) -// BiasAddGradDataFormat sets the optional data_format attribute to value. +// BiasAddDataFormat sets the optional data_format attribute to value. // // value: Specify the data format of the input and output data. With the // default format "NHWC", the bias tensor will be added to the last dimension @@ -10345,23 +9452,23 @@ type BiasAddGradAttr func(optionalAttr) // The tensor will be added to "in_channels", the third-to-the-last // dimension. // If not specified, defaults to "NHWC" -func BiasAddGradDataFormat(value string) BiasAddGradAttr { +func BiasAddDataFormat(value string) BiasAddAttr { return func(m optionalAttr) { m["data_format"] = value } } -// The backward operation for "BiasAdd" on the "bias" tensor. +// Adds `bias` to `value`. // -// It accumulates all the values from out_backprop into the feature dimension. -// For NHWC data format, the feature dimension is the last. For NCHW data format, -// the feature dimension is the third-to-last. +// This is a special case of `tf.add` where `bias` is restricted to be 1-D. +// Broadcasting is supported, so `value` may have any number of dimensions. // // Arguments: -// out_backprop: Any number of dimensions. +// value: Any number of dimensions. +// bias: 1-D with size the last dimension of `value`. // -// Returns 1-D with size the feature dimension of `out_backprop`. -func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { +// Returns Broadcasted sum of `value` and `bias`. +func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -10370,9 +9477,9 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt a(attrs) } opspec := tf.OpSpec{ - Type: "BiasAddGrad", + Type: "BiasAdd", Input: []tf.Input{ - out_backprop, + value, bias, }, Attrs: attrs, } @@ -10380,13 +9487,151 @@ func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAt return op.Output(0) } -// Returns 0 if x == 0, and x / y otherwise, elementwise. -func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse. +type SparseReduceSumSparseAttr func(optionalAttr) + +// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the sum of elements across dimensions of a SparseTensor. +// +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_sum()`. In contrast to SparseReduceSum, this Op returns a +// SparseTensor. +// +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. +// +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. +// +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Xdivy", + Type: "SparseReduceSumSparse", + Input: []tf.Input{ + input_indices, input_values, input_shape, reduction_axes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters. +type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load SGD embedding parameters. +// +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the stochastic gradient descent optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingStochasticGradientDescentParameters", + Input: []tf.Input{ + parameters, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Selects the k nearest centers for each point. +// +// Rows of points are assumed to be input points. Rows of centers are assumed to be +// the list of candidate centers. For each point, the k centers that have least L2 +// distance to it are computed. +// +// Arguments: +// points: Matrix of shape (n, d). Rows are assumed to be input points. +// centers: Matrix of shape (m, d). Rows are assumed to be centers. +// k: Number of nearest centers to return for each point. If k is larger than m, then +// only m centers are returned. +// +// Returns Matrix of shape (n, min(m, k)). Each row contains the indices of the centers +// closest to the corresponding point, ordered by increasing distance.Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the +// corresponding center in nearest_center_indices. +func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NearestNeighbors", + Input: []tf.Input{ + points, centers, k, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Returns x * y element-wise. +// +// *NOTE*: `Multiply` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Mul", Input: []tf.Input{ x, y, }, @@ -10593,261 +9838,6 @@ func KmeansPlusPlusInitialization(scope *Scope, points tf.Output, num_to_sample return op.Output(0) } -// Shuffle dimensions of x according to a permutation. -// -// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: -// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` -func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Transpose", - Input: []tf.Input{ - x, perm, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MinAttr is an optional argument to Min. -type MinAttr func(optionalAttr) - -// MinKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MinKeepDims(value bool) MinAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} - -// Computes the minimum of elements across dimensions of a tensor. -// -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. -// -// Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Min", - Input: []tf.Input{ - input, axis, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the Bessel i1e function of `x` element-wise. -// -// Exponentially scaled modified Bessel function of order 0 defined as -// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`. -// -// This function is faster and numerically stabler than `bessel_i1(x)`. -func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BesselI1e", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MapClearAttr is an optional argument to MapClear. -type MapClearAttr func(optionalAttr) - -// MapClearCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearCapacity(value int64) MapClearAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapClearMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapClearMemoryLimit(value int64) MapClearAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapClearContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapClearContainer(value string) MapClearAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapClearSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapClearSharedName(value string) MapClearAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes all elements in the underlying container. -// -// Returns the created operation. -func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapClear", - - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// DecodeCSVAttr is an optional argument to DecodeCSV. -type DecodeCSVAttr func(optionalAttr) - -// DecodeCSVFieldDelim sets the optional field_delim attribute to value. -// -// value: char delimiter to separate fields in a record. -// If not specified, defaults to "," -func DecodeCSVFieldDelim(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["field_delim"] = value - } -} - -// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. -// -// value: If false, treats double quotation marks as regular -// characters inside of the string fields (ignoring RFC 4180, Section 2, -// Bullet 5). -// If not specified, defaults to true -func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { - return func(m optionalAttr) { - m["use_quote_delim"] = value - } -} - -// DecodeCSVNaValue sets the optional na_value attribute to value. -// -// value: Additional string to recognize as NA/NaN. -// If not specified, defaults to "" -func DecodeCSVNaValue(value string) DecodeCSVAttr { - return func(m optionalAttr) { - m["na_value"] = value - } -} - -// DecodeCSVSelectCols sets the optional select_cols attribute to value. -// If not specified, defaults to <> -func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { - return func(m optionalAttr) { - m["select_cols"] = value - } -} - -// Convert CSV records to tensors. Each column maps to one tensor. -// -// RFC 4180 format is expected for the CSV records. -// (https://tools.ietf.org/html/rfc4180) -// Note that we allow leading and trailing spaces with int or float field. -// -// Arguments: -// records: Each string is a record/row in the csv and all records should have -// the same format. -// record_defaults: One tensor per column of the input record, with either a -// scalar default value for that column or an empty vector if the column is -// required. -// -// Returns Each tensor will have the same shape as records. -func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "DecodeCSV", - Input: []tf.Input{ - records, tf.OutputList(record_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("DecodeCSV", err) - return - } - return output -} - -// Convert JSON-encoded Example records to binary protocol buffer strings. -// -// This op translates a tensor containing Example records, encoded using -// the [standard JSON -// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), -// into a tensor containing the same records encoded as binary protocol -// buffers. The resulting tensor can then be fed to any of the other -// Example-parsing ops. -// -// Arguments: -// json_examples: Each string is a JSON object serialized according to the JSON -// mapping of the Example proto. -// -// Returns Each string is a binary Example protocol buffer corresponding -// to the respective element of `json_examples`. -func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeJSONExample", - Input: []tf.Input{ - json_examples, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Transforms a Tensor into a serialized TensorProto proto. // // Arguments: @@ -10868,21 +9858,6 @@ func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) { return op.Output(0) } -// Computes acos of x element-wise. -func Acos(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Acos", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // UnbatchGradAttr is an optional argument to UnbatchGrad. type UnbatchGradAttr func(optionalAttr) @@ -11149,409 +10124,183 @@ func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values } -// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize. -type QuantizeAndDequantizeAttr func(optionalAttr) +// SparseToDenseAttr is an optional argument to SparseToDense. +type SparseToDenseAttr func(optionalAttr) -// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value. +// SparseToDenseValidateIndices sets the optional validate_indices attribute to value. +// +// value: If true, indices are checked to make sure they are sorted in +// lexicographic order and that there are no repeats. // If not specified, defaults to true -func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to false -func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr { - return func(m optionalAttr) { - m["input_min"] = value - } -} - -// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value. -// If not specified, defaults to 0 -func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr { +func SparseToDenseValidateIndices(value bool) SparseToDenseAttr { return func(m optionalAttr) { - m["input_max"] = value + m["validate_indices"] = value } } -// Use QuantizeAndDequantizeV2 instead. +// Converts a sparse representation into a dense tensor. // -// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2 -func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "QuantizeAndDequantize", - Input: []tf.Input{ - input, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns locations of nonzero / true values in a tensor. +// Builds an array `dense` with shape `output_shape` such that // -// This operation returns the coordinates of true elements in `condition`. The -// coordinates are returned in a 2-D tensor where the first dimension (rows) -// represents the number of true elements, and the second dimension (columns) -// represents the coordinates of the true elements. Keep in mind, the shape of -// the output tensor can vary depending on how many true values there are in -// `condition`. Indices are output in row-major order. +// ``` +// # If sparse_indices is scalar +// dense[i] = (i == sparse_indices ? sparse_values : default_value) // -// For example: +// # If sparse_indices is a vector, then for each i +// dense[sparse_indices[i]] = sparse_values[i] // +// # If sparse_indices is an n by d matrix, then for each i in [0, n) +// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i] // ``` -// # 'input' tensor is [[True, False] -// # [True, False]] -// # 'input' has two true values, so output has two coordinates. -// # 'input' has rank of 2, so coordinates have two indices. -// where(input) ==> [[0, 0], -// [1, 0]] // -// # `condition` tensor is [[[True, False] -// # [True, False]] -// # [[False, True] -// # [False, True]] -// # [[False, False] -// # [False, True]]] -// # 'input' has 5 true values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] +// All other values in `dense` are set to `default_value`. If `sparse_values` is a +// scalar, all sparse indices are set to this single value. // -// # `condition` tensor is [[[1.5, 0.0] -// # [-0.5, 0.0]] -// # [[0.0, 0.25] -// # [0.0, 0.75]] -// # [[0.0, 0.0] -// # [0.0, 0.01]]] -// # 'input' has 5 nonzero values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] +// Indices should be sorted in lexicographic order, and indices must not +// contain any repeats. If `validate_indices` is true, these properties +// are checked during execution. // -// # `condition` tensor is [[[1.5 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.5j, 0.0 + 0.0j]] -// # [[0.0 + 0.0j, 0.25 + 1.5j] -// # [0.0 + 0.0j, 0.75 + 0.0j]] -// # [[0.0 + 0.0j, 0.0 + 0.0j] -// # [0.0 + 0.0j, 0.01 + 0.0j]]] -// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates. -// # 'input' has rank of 3, so coordinates have three indices. -// where(input) ==> [[0, 0, 0], -// [0, 1, 0], -// [1, 0, 1], -// [1, 1, 1], -// [2, 1, 1]] -// ``` -func Where(scope *Scope, condition tf.Output) (index tf.Output) { +// Arguments: +// sparse_indices: 0-D, 1-D, or 2-D. `sparse_indices[i]` contains the complete +// index where `sparse_values[i]` will be placed. +// output_shape: 1-D. Shape of the dense output tensor. +// sparse_values: 1-D. Values corresponding to each row of `sparse_indices`, +// or a scalar value to be used for all sparse indices. +// default_value: Scalar value to set for indices not specified in +// `sparse_indices`. +// +// Returns Dense output tensor of shape `output_shape`. +func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Where", + Type: "SparseToDense", Input: []tf.Input{ - condition, + sparse_indices, output_shape, sparse_values, default_value, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QueueDequeueV2Attr is an optional argument to QueueDequeueV2. -type QueueDequeueV2Attr func(optionalAttr) +// PreventGradientAttr is an optional argument to PreventGradient. +type PreventGradientAttr func(optionalAttr) -// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value. +// PreventGradientMessage sets the optional message attribute to value. // -// value: If the queue is empty, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr { +// value: Will be printed in the error when anyone tries to differentiate +// this operation. +// If not specified, defaults to "" +func PreventGradientMessage(value string) PreventGradientAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["message"] = value } } -// Dequeues a tuple of one or more tensors from the given queue. +// An identity op that triggers an error if a gradient is requested. // -// This operation has k outputs, where k is the number of components -// in the tuples stored in the given queue, and output i is the ith -// component of the dequeued tuple. +// When executed in a graph, this op outputs its input tensor as-is. // -// N.B. If the queue is empty, this operation will block until an element -// has been dequeued (or 'timeout_ms' elapses, if specified). +// When building ops to compute gradients, the TensorFlow gradient system +// will return an error when trying to lookup the gradient of this op, +// because no gradient must ever be registered for this function. This +// op exists to prevent subtle bugs from silently returning unimplemented +// gradients in some corner cases. // // Arguments: -// handle: The handle to a queue. -// component_types: The type of each component in a tuple. +// input: any tensor. // -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) { +// Returns the same input tensor. +func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueDequeueV2", + Type: "PreventGradient", Input: []tf.Input{ - handle, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueV2", err) - return - } - return components -} - -// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample. -type ParseSequenceExampleAttr func(optionalAttr) - -// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_sparse"] = value - } -} - -// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Ncontext_dense"] = value - } + return op.Output(0) } -// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_sparse"] = value +// Computes asin of x element-wise. +func Asin(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["Nfeature_list_dense"] = value + opspec := tf.OpSpec{ + Type: "Asin", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. -// -// value: A list of Ncontext_sparse types; the data types of data in -// each context Feature given in context_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> +// Computes the sum along sparse segments of a tensor. // -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. -// If not specified, defaults to <> +// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. // -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_types"] = value - } -} - -// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation) +// for an explanation of segments. // -// value: A list of Ncontext_dense shapes; the shapes of data in -// each context Feature given in context_dense_keys. -// The number of elements in the Feature corresponding to context_dense_key[j] -// must always equal context_dense_shapes[j].NumEntries(). -// The shape of context_dense_values[j] will match context_dense_shapes[j]. -// If not specified, defaults to <> +// For example: // -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["context_dense_shapes"] = value - } -} - -// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. +// ```python +// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) // -// value: A list of Nfeature_list_sparse types; the data types -// of data in each FeatureList given in feature_list_sparse_keys. -// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// If not specified, defaults to <> +// tf.sparse_segment_sum_with_num_segments( +// c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3) +// # => [[0 0 0 0] +// # [0 0 0 0] +// # [0 0 0 0]] // -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_sparse_types"] = value - } -} - -// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. +// tf.sparse_segment_sum_with_num_segments(c, +// tf.constant([0, 1]), +// tf.constant([0, 2], +// num_segments=4)) +// # => [[ 1 2 3 4] +// # [ 0 0 0 0] +// # [-1 -2 -3 -4] +// # [ 0 0 0 0]] +// ``` // -// value: A list of Nfeature_list_dense shapes; the shapes of -// data in each FeatureList given in feature_list_dense_keys. -// The shape of each Feature in the FeatureList corresponding to -// feature_list_dense_key[j] must always equal -// feature_list_dense_shapes[j].NumEntries(). -// If not specified, defaults to <> +// Arguments: // -// REQUIRES: len(value) >= 0 -func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { - return func(m optionalAttr) { - m["feature_list_dense_shapes"] = value - } -} - -// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. // -// Arguments: -// serialized: A vector containing binary serialized SequenceExample protos. -// debug_name: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) name for the -// corresponding serialized proto. This is purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no name is available. -// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). -// context_dense_defaults[j] provides default values -// when the SequenceExample's context map lacks context_dense_key[j]. -// If an empty Tensor is provided for context_dense_defaults[j], -// then the Feature context_dense_keys[j] is required. -// The input type is inferred from context_dense_defaults[j], even when it's -// empty. If context_dense_defaults[j] is not empty, its shape must match -// context_dense_shapes[j]. -// feature_list_dense_missing_assumed_empty: A vector listing the -// FeatureList keys which may be missing from the SequenceExamples. If the -// associated FeatureList is missing, it is treated as empty. By default, -// any FeatureList not listed in this vector must exist in the SequenceExamples. -// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). -// The keys expected in the Examples' features associated with context_sparse -// values. -// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' context features associated with -// dense values. -// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors -// (scalars). The keys expected in the FeatureLists associated with sparse -// values. -// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). -// The keys expected in the SequenceExamples' feature_lists associated -// with lists of dense values. -func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { +// Returns Has same shape as data, except for dimension 0 which +// has size `num_segments`. +func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ParseSequenceExample", + Type: "SparseSegmentSumWithNumSegments", Input: []tf.Input{ - serialized, debug_name, tf.OutputList(context_dense_defaults), + data, indices, segment_ids, num_segments, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { - scope.UpdateErr("ParseSequenceExample", err) - return - } - return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths + return op.Output(0) } // SparseReduceMaxAttr is an optional argument to SparseReduceMax. @@ -11609,147 +10358,149 @@ func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Outp return op.Output(0) } -// Computes the Gauss error function of `x` element-wise. -func Erf(scope *Scope, x tf.Output) (y tf.Output) { +// DecodeRawAttr is an optional argument to DecodeRaw. +type DecodeRawAttr func(optionalAttr) + +// DecodeRawLittleEndian sets the optional little_endian attribute to value. +// +// value: Whether the input `bytes` are in little-endian order. +// Ignored for `out_type` values that are stored in a single byte like +// `uint8`. +// If not specified, defaults to true +func DecodeRawLittleEndian(value bool) DecodeRawAttr { + return func(m optionalAttr) { + m["little_endian"] = value + } +} + +// Reinterpret the bytes of a string as a vector of numbers. +// +// Arguments: +// bytes: All the elements must have the same length. +// +// +// Returns A Tensor with one more dimension than the input `bytes`. The +// added dimension will have size equal to the length of the elements +// of `bytes` divided by the number of bytes to represent `out_type`. +func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"out_type": out_type} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Erf", + Type: "DecodeRaw", Input: []tf.Input{ - x, + bytes, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns element-wise largest integer not greater than x. -func Floor(scope *Scope, x tf.Output) (y tf.Output) { +// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters. +type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve ADAM embedding parameters. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm. +func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Floor", - Input: []tf.Input{ - x, - }, + Type: "RetrieveTPUEmbeddingADAMParameters", + + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// OneHotAttr is an optional argument to OneHot. -type OneHotAttr func(optionalAttr) +// FusedBatchNormAttr is an optional argument to FusedBatchNorm. +type FusedBatchNormAttr func(optionalAttr) -// OneHotAxis sets the optional axis attribute to value. +// FusedBatchNormEpsilon sets the optional epsilon attribute to value. // -// value: The axis to fill (default: -1, a new inner-most axis). -// If not specified, defaults to -1 -func OneHotAxis(value int64) OneHotAttr { +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { return func(m optionalAttr) { - m["axis"] = value + m["epsilon"] = value } } -// Returns a one-hot tensor. -// -// The locations represented by indices in `indices` take value `on_value`, -// while all other locations take value `off_value`. -// -// If the input `indices` is rank `N`, the output will have rank `N+1`, -// The new axis is created at dimension `axis` (default: the new axis is -// appended at the end). +// FusedBatchNormDataFormat sets the optional data_format attribute to value. // -// If `indices` is a scalar the output shape will be a vector of length `depth`. +// value: The data format for x and y. Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// FusedBatchNormIsTraining sets the optional is_training attribute to value. // -// If `indices` is a vector of length `features`, the output shape will be: -// ``` -// features x depth if axis == -1 -// depth x features if axis == 0 -// ``` +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Batch normalization. // -// If `indices` is a matrix (batch) with shape `[batch, features]`, -// the output shape will be: -// ``` -// batch x features x depth if axis == -1 -// batch x depth x features if axis == 1 -// depth x batch x features if axis == 0 -// ``` +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // +// Arguments: +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// offset: A 1D Tensor for offset, to shift to the normalized x. +// mean: A 1D Tensor for population mean. Used for inference only; +// must be empty for training. +// variance: A 1D Tensor for population variance. Used for inference only; +// must be empty for training. // -// Examples -// ========= -// -// Suppose that -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 5.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[4 x 3]`: -// ``` -// output = -// [5.0 0.0 0.0] // one_hot(0) -// [0.0 0.0 5.0] // one_hot(2) -// [0.0 0.0 0.0] // one_hot(-1) -// [0.0 5.0 0.0] // one_hot(1) -// ``` -// -// Suppose that -// ``` -// indices = [0, 2, -1, 1] -// depth = 3 -// on_value = 0.0 -// off_value = 3.0 -// axis = 0 -// ``` -// -// Then output is `[3 x 4]`: -// ``` -// output = -// [0.0 3.0 3.0 3.0] -// [3.0 3.0 3.0 0.0] -// [3.0 3.0 3.0 3.0] -// [3.0 0.0 3.0 3.0] -// // ^ one_hot(0) -// // ^ one_hot(2) -// // ^ one_hot(-1) -// // ^ one_hot(1) -// ``` -// -// Suppose that -// ``` -// indices = [[0, 2], [1, -1]] -// depth = 3 -// on_value = 1.0 -// off_value = 0.0 -// axis = -1 -// ``` -// -// Then output is `[2 x 2 x 3]`: -// ``` -// output = -// [ -// [1.0, 0.0, 0.0] // one_hot(0) -// [0.0, 0.0, 1.0] // one_hot(2) -// ][ -// [0.0, 1.0, 0.0] // one_hot(1) -// [0.0, 0.0, 0.0] // one_hot(-1) -// ] -// ``` -// -// Arguments: -// indices: A tensor of indices. -// depth: A scalar defining the depth of the one hot dimension. -// on_value: A scalar defining the value to fill in output when `indices[j] = i`. -// off_value: A scalar defining the value to fill in output when `indices[j] != i`. -// -// Returns The one-hot tensor. -func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) { +// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow +// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by +// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused +// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance +// in the cuDNN case), to be reused in the gradient computation. +func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { if scope.Err() != nil { return } @@ -11758,166 +10509,174 @@ func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output a(attrs) } opspec := tf.OpSpec{ - Type: "OneHot", + Type: "FusedBatchNorm", Input: []tf.Input{ - indices, depth, on_value, off_value, + x, scale, offset, mean, variance, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// CudnnRNNAttr is an optional argument to CudnnRNN. -type CudnnRNNAttr func(optionalAttr) +// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. +type RandomStandardNormalAttr func(optionalAttr) -// CudnnRNNRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNRnnMode(value string) CudnnRNNAttr { +// RandomStandardNormalSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { return func(m optionalAttr) { - m["rnn_mode"] = value + m["seed"] = value } } -// CudnnRNNInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNInputMode(value string) CudnnRNNAttr { +// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { return func(m optionalAttr) { - m["input_mode"] = value + m["seed2"] = value } } -// CudnnRNNDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNDirection(value string) CudnnRNNAttr { - return func(m optionalAttr) { - m["direction"] = value +// Outputs random values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. +// +// Arguments: +// shape: The shape of the output tensor. +// dtype: The type of the output. +// +// Returns A tensor of the specified shape filled with random normal values. +func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// CudnnRNNDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNDropout(value float32) CudnnRNNAttr { - return func(m optionalAttr) { - m["dropout"] = value + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) } -} - -// CudnnRNNSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNSeed(value int64) CudnnRNNAttr { - return func(m optionalAttr) { - m["seed"] = value + opspec := tf.OpSpec{ + Type: "RandomStandardNormal", + Input: []tf.Input{ + shape, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// CudnnRNNSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNSeed2(value int64) CudnnRNNAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} +// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. +type FusedResizeAndPadConv2DAttr func(optionalAttr) -// CudnnRNNIsTraining sets the optional is_training attribute to value. -// If not specified, defaults to true -func CudnnRNNIsTraining(value bool) CudnnRNNAttr { +// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { return func(m optionalAttr) { - m["is_training"] = value + m["resize_align_corners"] = value } } -// A RNN backed by cuDNN. +// Performs a resize and padding as a preprocess during a convolution. // -// Computes the RNN from the input and initial states, with respect to the params -// buffer. +// It's often possible to do spatial transformations more efficiently as part of +// the packing stage of a convolution, so this op allows for an optimized +// implementation where these stages are fused together. This prevents the need to +// write out the intermediate results as whole tensors, reducing memory pressure, +// and we can get some latency gains by merging the transformation calculations. +// The data_format attribute for Conv2D isn't supported by this op, and defaults to +// 'NHWC' order. +// Internally this op uses a single per-graph scratch buffer, which means that it +// will block if multiple versions are being run in parallel. This is because this +// operator is primarily an optimization to minimize memory usage. // -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// is_training: Indicates whether this operation is used for inferenece or -// training. -// reserve_space: An opaque tensor that can be used in backprop calculation. It -// is only produced if is_training is false. -func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) { +// Arguments: +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. +// +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. Must be in the same order as the dimension specified with format. +// padding: The type of padding algorithm to use. +func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "CudnnRNN", + Type: "FusedResizeAndPadConv2D", Input: []tf.Input{ - input, input_h, input_c, params, + input, size, paddings, filter, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) + return op.Output(0) } -// DecodeCompressedAttr is an optional argument to DecodeCompressed. -type DecodeCompressedAttr func(optionalAttr) +// RandomUniformAttr is an optional argument to RandomUniform. +type RandomUniformAttr func(optionalAttr) -// DecodeCompressedCompressionType sets the optional compression_type attribute to value. +// RandomUniformSeed sets the optional seed attribute to value. // -// value: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// If not specified, defaults to "" -func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformSeed(value int64) RandomUniformAttr { return func(m optionalAttr) { - m["compression_type"] = value + m["seed"] = value } } -// Decompress strings. +// RandomUniformSeed2 sets the optional seed2 attribute to value. // -// This op decompresses each element of the `bytes` input `Tensor`, which -// is assumed to be compressed using the given `compression_type`. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformSeed2(value int64) RandomUniformAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a uniform distribution. // -// The `output` is a string `Tensor` of the same shape as `bytes`, -// each element containing the decompressed data from the corresponding -// element in `bytes`. +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. // // Arguments: -// bytes: A Tensor of string which is compressed. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns A Tensor with the same shape as input `bytes`, uncompressed -// from bytes. -func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { +// Returns A tensor of the specified shape filled with uniform random values. +func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeCompressed", + Type: "RandomUniform", Input: []tf.Input{ - bytes, + shape, }, Attrs: attrs, } @@ -11925,115 +10684,186 @@ func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompresse return op.Output(0) } -// DecodeRawAttr is an optional argument to DecodeRaw. -type DecodeRawAttr func(optionalAttr) +// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. +type ResourceApplyFtrlAttr func(optionalAttr) -// DecodeRawLittleEndian sets the optional little_endian attribute to value. +// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. // -// value: Whether the input `bytes` are in little-endian order. -// Ignored for `out_type` values that are stored in a single byte like -// `uint8`. -// If not specified, defaults to true -func DecodeRawLittleEndian(value bool) DecodeRawAttr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { return func(m optionalAttr) { - m["little_endian"] = value + m["use_locking"] = value } } -// Reinterpret the bytes of a string as a vector of numbers. +// Update '*var' according to the Ftrl-proximal scheme. // -// Arguments: -// bytes: All the elements must have the same length. +// accum_new = accum + grad * grad +// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 regulariation. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. // -// Returns A Tensor with one more dimension than the input `bytes`. The -// added dimension will have size equal to the length of the elements -// of `bytes` divided by the number of bytes to represent `out_type`. -func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeRaw", + Type: "ResourceApplyFtrl", Input: []tf.Input{ - bytes, + var_, accum, linear, grad, lr, l1, l2, lr_power, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes natural logarithm of (1 + x) element-wise. +// Transforms a vector of brain.Example protos (as strings) into typed tensors. // -// I.e., \\(y = \log_e (1 + x)\\). -func Log1p(scope *Scope, x tf.Output) (y tf.Output) { +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// names: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) names for the +// corresponding serialized protos. These are purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no names are available. +// If non-empty, this vector must be the same length as "serialized". +// sparse_keys: A list of Nsparse string Tensors (scalars). +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: A list of Ndense string Tensors (scalars). +// The keys expected in the Examples' features associated with dense values. +// dense_defaults: A list of Ndense Tensors (some may be empty). +// dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// sparse_types: A list of Nsparse types; the data types of data in each Feature +// given in sparse_keys. +// Currently the ParseExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature +// given in dense_keys. +// The number of elements in the Feature corresponding to dense_key[j] +// must always equal dense_shapes[j].NumEntries(). +// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output +// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): +// The dense outputs are just the inputs row-stacked by batch. +// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case +// the shape of the output Tensor dense_values[j] will be +// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks +// of elements of length D1 * .... * DN, across all minibatch entries +// in the input. Any minibatch entry with less than M blocks of elements of +// length D1 * ... * DN will be padded with the corresponding default_value +// scalar element along the second dimension. +func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} opspec := tf.OpSpec{ - Type: "Log1p", + Type: "ParseExample", Input: []tf.Input{ - x, + serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseExample", err) + return + } + return sparse_indices, sparse_values, sparse_shapes, dense_values } -// Computes rectified linear 6 gradients for a Relu6 operation. +// Compute the pairwise cross product. +// +// `a` and `b` must be the same shape; they can either be simple 3-element vectors, +// or any shape where the innermost dimension is 3. In the latter case, each pair +// of corresponding 3-element vectors is cross-multiplied independently. // // Arguments: -// gradients: The backpropagated gradients to the corresponding Relu6 operation. -// features: The features passed as input to the corresponding Relu6 operation, or -// its output; using either one produces the same result. +// a: A tensor containing 3-element vectors. +// b: Another tensor, of same type and shape as `a`. // -// Returns The gradients: -// `gradients * (features > 0) * (features < 6)`. -func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// Returns Pairwise cross product of the vectors in `a` and `b`. +func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Relu6Grad", + Type: "Cross", Input: []tf.Input{ - gradients, features, + a, b, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeBicubicAttr is an optional argument to ResizeBicubic. -type ResizeBicubicAttr func(optionalAttr) +// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2. +type StatefulStandardNormalV2Attr func(optionalAttr) -// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. +// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value. // -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr { return func(m optionalAttr) { - m["align_corners"] = value + m["dtype"] = value } } -// Resize `images` to `size` using bicubic interpolation. +// Outputs random values from a normal distribution. // -// Input images can be of different types but output images are always float. +// The generated values will have mean 0 and standard deviation 1. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// resource: The handle of the resource variable that stores the state of the RNG. +// algorithm: The RNG algorithm. +// shape: The shape of the output tensor. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { +// Returns A tensor of the specified shape filled with random normal values. +func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -12042,9 +10872,9 @@ func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...R a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeBicubic", + Type: "StatefulStandardNormalV2", Input: []tf.Input{ - images, size, + resource, algorithm, shape, }, Attrs: attrs, } @@ -12052,133 +10882,201 @@ func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...R return op.Output(0) } -// Gather ragged slices from `params` axis `0` according to `indices`. +// Locks a mutex resource. The output is the lock. So long as the lock tensor // -// Outputs a `RaggedTensor` output composed from `output_dense_values` and -// `output_nested_splits`, such that: +// is alive, any other request to use `MutexLock` with this mutex will wait. +// +// This is particularly useful for creating a critical section when used in +// conjunction with `MutexLockIdentity`: // // ```python -// output.shape = indices.shape + params.shape[1:] -// output.ragged_rank = indices.shape.ndims + params.ragged_rank -// output[i...j, d0...dn] = params[indices[i...j], d0...dn] -// ``` // -// where +// mutex = mutex_v2( +// shared_name=handle_name, container=container, name=name) // -// * `params = -// ragged.from_nested_row_splits(params_dense_values, params_nested_splits)` -// provides the values that should be gathered. -// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which -// values should be gathered. -// * `output = -// ragged.from_nested_row_splits(output_dense_values, output_nested_splits)` -// is the output tensor. +// def execute_in_critical_section(fn, *args, **kwargs): +// lock = gen_resource_variable_ops.mutex_lock(mutex) // -// (Note: This c++ op is used to implement the higher-level python -// `tf.ragged.gather` op, which also supports ragged indices.) +// with ops.control_dependencies([lock]): +// r = fn(*args, **kwargs) // +// with ops.control_dependencies(nest.flatten(r)): +// with ops.colocate_with(mutex): +// ensure_lock_exists = mutex_lock_identity(lock) // -// Arguments: -// params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the -// `params` RaggedTensor input. -// params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change -// at the python level from dense_values to flat_values, so dense_values is the -// deprecated name. -// indices: Indices in the outermost dimension of `params` of the values that should be -// gathered. -// OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain -// this number of `row_splits` tensors. This value should equal -// `indices.shape.ndims + params.ragged_rank - 1`. +// # Make sure that if any element of r is accessed, all of +// # them are executed together. +// r = nest.map_structure(tf.identity, r) // -// Returns The `nested_row_splits` tensors that define the row-partitioning for the -// returned RaggedTensor.The `flat_values` for the returned RaggedTensor. -func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) { - if scope.Err() != nil { - return +// with ops.control_dependencies([ensure_lock_exists]): +// return nest.map_structure(tf.identity, r) +// ``` +// +// While `fn` is running in the critical section, no other functions which wish to +// use this critical section may run. +// +// Often the use case is that two executions of the same graph, in parallel, +// wish to run `fn`; and we wish to ensure that only one of them executes +// at a time. This is especially important if `fn` modifies one or more +// variables at a time. +// +// It is also useful if two separate functions must share a resource, but we +// wish to ensure the usage is exclusive. +// +// Arguments: +// mutex: The mutex resource to lock. +// +// Returns A tensor that keeps a shared pointer to a lock on the mutex; +// when the Tensor is destroyed, the use count on the shared pointer is decreased +// by 1. When it reaches 0, the lock is released. +func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { + if scope.Err() != nil { + return } - attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK} opspec := tf.OpSpec{ - Type: "RaggedGather", + Type: "MutexLock", Input: []tf.Input{ - tf.OutputList(params_nested_splits), params_dense_values, indices, + mutex, }, - Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// +// Arguments: +// serialized: A scalar string containing a serialized TensorProto proto. +// out_type: The type of the serialized tensor. The provided type must match the +// type of the serialized tensor and no implicit conversion will take place. +// +// Returns A Tensor of type `out_type`. +func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { - scope.UpdateErr("RaggedGather", err) - return + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "ParseTensor", + Input: []tf.Input{ + serialized, + }, + Attrs: attrs, } - output_dense_values = op.Output(idx) - return output_nested_splits, output_dense_values + op := scope.AddOperation(opspec) + return op.Output(0) } -// Greedily selects a subset of bounding boxes in descending order of score, +// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. +type MaxPoolWithArgmaxAttr func(optionalAttr) + +// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. +// If not specified, defaults to DT_INT64 +func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { + return func(m optionalAttr) { + m["Targmax"] = value + } +} + +// Performs max pooling on the input and outputs both max values and indices. // -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. +// The indices in `argmax` are flattened, so that a maximum value at position +// `[b, y, x, c]` becomes flattened index +// `((b * height + y) * width + x) * channels + c`. // -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: +// The indices returned are always in `[0, height) x [0, width)` before flattening, +// even if padding is involved and the mathematically correct answer is outside +// (either negative or too large). This is a bug, but fixing it is difficult to do +// in a safe backwards compatible way, especially due to flattening. // -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) +// Arguments: +// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. +func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolWithArgmax", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// MaxPoolAttr is an optional argument to MaxPool. +type MaxPoolAttr func(optionalAttr) + +// MaxPoolDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolDataFormat(value string) MaxPoolAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs max pooling on the input. // // Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { +// Returns The max pooled output tensor. +func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV2", + Type: "MaxPool", Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Subtracts sparse updates from the variable referenced by `resource`. +// Multiplies sparse updates into the variable referenced by `resource`. // // This operation computes // // # Scalar indices -// ref[indices, ...] -= updates[...] +// ref[indices, ...] *= updates[...] // // # Vector indices (for each i) -// ref[indices[i], ...] -= updates[i, ...] +// ref[indices[i], ...] *= updates[i, ...] // // # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...] +// ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...] // // Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. +// the same location, their contributions multiply. // // Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. // @@ -12192,12 +11090,12 @@ func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_ou // updates: A tensor of updated values to add to `ref`. // // Returns the created operation. -func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ResourceScatterSub", + Type: "ResourceScatterMul", Input: []tf.Input{ resource, indices, updates, }, @@ -12205,139 +11103,149 @@ func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } -// Converts a `RaggedTensor` into a `SparseTensor` with the same values. +// Subtracts sparse updates from the variable referenced by `resource`. // -// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) -// output=SparseTensor(indices=sparse_indices, values=sparse_values, -// dense_shape=sparse_dense_shape) +// This operation computes +// +// # Scalar indices +// ref[indices, ...] -= updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] -= updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
// // Arguments: -// rt_nested_splits: The `row_splits` for the `RaggedTensor`. -// rt_dense_values: The `flat_values` for the `RaggedTensor`. +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. // -// Returns The indices for the `SparseTensor`.The values of the `SparseTensor`.`sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`. -func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) { +// Returns the created operation. +func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RaggedTensorToSparse", + Type: "ResourceScatterSub", Input: []tf.Input{ - tf.OutputList(rt_nested_splits), rt_dense_values, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Check if the input matches the regex pattern. +// Adds sparse updates to the variable referenced by `resource`. // -// The input is a string tensor of any shape. The pattern is a scalar -// string tensor which is applied to every element of the input tensor. -// The boolean values (True or False) of the output tensor indicate -// if the input matches the regex pattern provided. +// This operation computes // -// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// # Scalar indices +// ref[indices, ...] += updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] += updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions add. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
// // Arguments: -// input: A string tensor of the text to be processed. -// pattern: A scalar string tensor containing the regular expression to match the input. +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. // -// Returns A bool tensor with the same shape as `input`. -func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) { +// Returns the created operation. +func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RegexFullMatch", + Type: "ResourceScatterAdd", Input: []tf.Input{ - input, pattern, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Says whether the targets are in the top `K` predictions. -// -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. -// -// More formally, let +// Reads the value of a variable. // -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, +// The tensor returned by this operation is immutable. // -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// The value returned by this operation is guaranteed to be influenced by all the +// writes on which this operation depends directly or indirectly, and to not be +// influenced by any of the writes which depend directly or indirectly on this +// operation. // // Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. -// -// Returns Computed precision at `k` as a `bool Tensor`. -func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { +// resource: handle to the resource in which to store the variable. +// dtype: the dtype of the value. +func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "InTopKV2", + Type: "ReadVariableOp", Input: []tf.Input{ - predictions, targets, k, + resource, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// RandomShuffleAttr is an optional argument to RandomShuffle. -type RandomShuffleAttr func(optionalAttr) - -// RandomShuffleSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomShuffleSeed(value int64) RandomShuffleAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. +type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) -// RandomShuffleSeed2 sets the optional seed2 attribute to value. +// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleSeed2(value int64) RandomShuffleAttr { +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_locking"] = value } } -// Randomly shuffles a tensor along its first dimension. -// -// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped -// to one and only one `output[i]`. For example, a mapping that might occur for a -// 3x2 tensor is: +// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm. // -// ``` -// [[1, 2], [[5, 6], -// [3, 4], ==> [1, 2], -// [5, 6]] [3, 4]] -// ``` +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// prox_v = var +// prox_v -= lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} // // Arguments: -// value: The tensor to be shuffled. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // -// Returns A tensor of same shape and type as `value`, shuffled along its first -// dimension. -func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { +// Returns the created operation. +func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -12346,87 +11254,112 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "RandomShuffle", + Type: "ResourceSparseApplyProximalAdagrad", Input: []tf.Input{ - value, + var_, accum, lr, l1, l2, grad, indices, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes sigmoid of `x` element-wise. +// DecodeJpegAttr is an optional argument to DecodeJpeg. +type DecodeJpegAttr func(optionalAttr) + +// DecodeJpegChannels sets the optional channels attribute to value. // -// Specifically, `y = 1 / (1 + exp(-x))`. -func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeJpegChannels(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["channels"] = value } - opspec := tf.OpSpec{ - Type: "Sigmoid", - Input: []tf.Input{ - x, - }, +} + +// DecodeJpegRatio sets the optional ratio attribute to value. +// +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeJpegRatio(value int64) DecodeJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// FusedBatchNormAttr is an optional argument to FusedBatchNorm. -type FusedBatchNormAttr func(optionalAttr) +// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. +// +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} -// FusedBatchNormEpsilon sets the optional epsilon attribute to value. +// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr { +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { return func(m optionalAttr) { - m["epsilon"] = value + m["try_recover_truncated"] = value } } -// FusedBatchNormDataFormat sets the optional data_format attribute to value. +// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. // -// value: The data format for x and y. Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormDataFormat(value string) FusedBatchNormAttr { +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { return func(m optionalAttr) { - m["data_format"] = value + m["acceptable_fraction"] = value } } -// FusedBatchNormIsTraining sets the optional is_training attribute to value. +// DecodeJpegDctMethod sets the optional dct_method attribute to value. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr { +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeJpegDctMethod(value string) DecodeJpegAttr { return func(m optionalAttr) { - m["is_training"] = value + m["dct_method"] = value } } -// Batch normalization. +// Decode a JPEG-encoded image to a uint8 tensor. // -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// The attr `channels` indicates the desired number of color channels for the +// decoded image. +// +// Accepted values are: +// +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. +// +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. +// +// +// This op also supports decoding PNGs and non-animated GIFs since the interface is +// the same, though it is cleaner to use `tf.image.decode_image`. // // Arguments: -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// offset: A 1D Tensor for offset, to shift to the normalized x. -// mean: A 1D Tensor for population mean. Used for inference only; -// must be empty for training. -// variance: A 1D Tensor for population variance. Used for inference only; -// must be empty for training. +// contents: 0-D. The JPEG-encoded image. // -// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow -// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by -// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused -// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance -// in the cuDNN case), to be reused in the gradient computation. -func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) { +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { if scope.Err() != nil { return } @@ -12435,62 +11368,79 @@ func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNorm", + Type: "DecodeJpeg", Input: []tf.Input{ - x, scale, offset, mean, variance, + contents, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0) } -// RandomStandardNormalAttr is an optional argument to RandomStandardNormal. -type RandomStandardNormalAttr func(optionalAttr) +// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput. +type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr) -// RandomStandardNormalSeed sets the optional seed attribute to value. +// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr { return func(m optionalAttr) { - m["seed"] = value + m["data_format"] = value } } -// RandomStandardNormalSeed2 sets the optional seed2 attribute to value. +// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Outputs random values from a normal distribution. -// -// The generated values will have mean 0 and standard deviation 1. +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of depthwise convolution with respect to the input. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// input_sizes: An integer vector representing the shape of `input`, based +// on `data_format`. For example, if `data_format` is 'NHWC' then +// `input` is a 4-D `[batch, height, width, channels]` tensor. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, depthwise_multiplier]`. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. // -// Returns A tensor of the specified shape filled with random normal values. -func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) { +// Returns 4-D with shape according to `data_format`. For example, if +// `data_format` is 'NHWC', output shape is `[batch, in_height, +// in_width, in_channels]`. Gradient w.r.t. the input of the +// convolution. +func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomStandardNormal", + Type: "DepthwiseConv2dNativeBackpropInput", Input: []tf.Input{ - shape, + input_sizes, filter, out_backprop, }, Attrs: attrs, } @@ -12498,57 +11448,85 @@ func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, opti return op.Output(0) } -// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. -type FusedResizeAndPadConv2DAttr func(optionalAttr) +// EditDistanceAttr is an optional argument to EditDistance. +type EditDistanceAttr func(optionalAttr) -// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value. +// EditDistanceNormalize sets the optional normalize attribute to value. // -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr { +// value: boolean (if true, edit distances are normalized by length of truth). +// +// The output is: +// If not specified, defaults to true +func EditDistanceNormalize(value bool) EditDistanceAttr { return func(m optionalAttr) { - m["resize_align_corners"] = value + m["normalize"] = value } } -// Performs a resize and padding as a preprocess during a convolution. +// Computes the (possibly normalized) Levenshtein Edit Distance. // -// It's often possible to do spatial transformations more efficiently as part of -// the packing stage of a convolution, so this op allows for an optimized -// implementation where these stages are fused together. This prevents the need to -// write out the intermediate results as whole tensors, reducing memory pressure, -// and we can get some latency gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and defaults to -// 'NHWC' order. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. +// The inputs are variable-length sequences provided by SparseTensors +// (hypothesis_indices, hypothesis_values, hypothesis_shape) +// and +// (truth_indices, truth_values, truth_shape). +// +// The inputs are: // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. +// hypothesis_indices: The indices of the hypothesis list SparseTensor. +// This is an N x R int64 matrix. +// hypothesis_values: The values of the hypothesis list SparseTensor. +// This is an N-length vector. +// hypothesis_shape: The shape of the hypothesis list SparseTensor. +// This is an R-length vector. +// truth_indices: The indices of the truth list SparseTensor. +// This is an M x R int64 matrix. +// truth_values: The values of the truth list SparseTensor. +// This is an M-length vector. +// truth_shape: truth indices, vector. // -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) { +// Returns A dense float tensor with rank R - 1. +// +// For the example input: +// +// // hypothesis represents a 2x1 matrix with variable-length values: +// // (0,0) = ["a"] +// // (1,0) = ["b"] +// hypothesis_indices = [[0, 0, 0], +// [1, 0, 0]] +// hypothesis_values = ["a", "b"] +// hypothesis_shape = [2, 1, 1] +// +// // truth represents a 2x2 matrix with variable-length values: +// // (0,0) = [] +// // (0,1) = ["a"] +// // (1,0) = ["b", "c"] +// // (1,1) = ["a"] +// truth_indices = [[0, 1, 0], +// [1, 0, 0], +// [1, 0, 1], +// [1, 1, 0]] +// truth_values = ["a", "b", "c", "a"] +// truth_shape = [2, 2, 2] +// normalize = true +// +// The output will be: +// +// // output is a 2x2 matrix with edit distances normalized by truth lengths. +// output = [[inf, 1.0], // (0,0): no truth, (0,1): no hypothesis +// [0.5, 1.0]] // (1,0): addition, (1,1): no hypothesis +func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FusedResizeAndPadConv2D", + Type: "EditDistance", Input: []tf.Input{ - input, size, paddings, filter, + hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape, }, Attrs: attrs, } @@ -12556,95 +11534,168 @@ func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, padd return op.Output(0) } -// RandomUniformAttr is an optional argument to RandomUniform. -type RandomUniformAttr func(optionalAttr) +// Returns 0 if x == 0, and x * log(y) otherwise, elementwise. +func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Xlogy", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// RandomUniformSeed sets the optional seed attribute to value. +// Stops gradient computation. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformSeed(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed"] = value +// When executed in a graph, this op outputs its input tensor as-is. +// +// When building ops to compute gradients, this op prevents the contribution of +// its inputs to be taken into account. Normally, the gradient generator adds ops +// to a graph to compute the derivatives of a specified 'loss' by recursively +// finding out inputs that contributed to its computation. If you insert this op +// in the graph it inputs are masked from the gradient generator. They are not +// taken into account for computing gradients. +// +// This is useful any time you want to compute a value with TensorFlow but need +// to pretend that the value was a constant. Some examples include: +// +// * The *EM* algorithm where the *M-step* should not involve backpropagation +// through the output of the *E-step*. +// * Contrastive divergence training of Boltzmann machines where, when +// differentiating the energy function, the training must not backpropagate +// through the graph that generated the samples from the model. +// * Adversarial training, where no backprop should happen through the adversarial +// example generation process. +func StopGradient(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return } + opspec := tf.OpSpec{ + Type: "StopGradient", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomUniformSeed2 sets the optional seed2 attribute to value. +// Eagerly executes a python function to compute func(input)->output. The // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformSeed2(value int64) RandomUniformAttr { - return func(m optionalAttr) { - m["seed2"] = value +// semantics of the input, output, and attributes are the same as those for +// PyFunc. +func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"token": token, "Tout": Tout} + opspec := tf.OpSpec{ + Type: "EagerPyFunc", + Input: []tf.Input{ + tf.OutputList(input), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("EagerPyFunc", err) + return + } + return output } -// Outputs random values from a uniform distribution. +// Concats all tensors in the list along the 0th dimension. // -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// Requires that all tensors have the same shape except the first dimension. // -// Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. +// input_handle: The input list. +// element_shape: The shape of the uninitialized elements in the list. If the first +// dimension is not -1, it is assumed that all list elements have the same +// leading dim. +// leading_dims: The list of leading dims of uninitialized list elements. Used if +// the leading dim of input_handle.element_shape or the element_shape input arg +// is not already set. +// tensor: The concated result. +// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. // -// Returns A tensor of the specified shape filled with uniform random values. -func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) { +func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "RandomUniform", + Type: "TensorListConcatV2", Input: []tf.Input{ - shape, + input_handle, element_shape, leading_dims, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl. -type ResourceApplyFtrlAttr func(optionalAttr) +// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. +type MatrixTriangularSolveAttr func(optionalAttr) -// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value. +// MatrixTriangularSolveLower sets the optional lower attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: Boolean indicating whether the innermost matrices in `matrix` are +// lower or upper triangular. +// If not specified, defaults to true +func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { + return func(m optionalAttr) { + m["lower"] = value + } +} + +// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. +// +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// +// @compatibility(numpy) +// Equivalent to scipy.linalg.solve_triangular +// @end_compatibility // If not specified, defaults to false -func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr { +func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["adjoint"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// Solves systems of linear equations with upper or lower triangular matrices by // -// accum_new = accum + grad * grad -// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// backsubstitution. +// +// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form +// square matrices. If `lower` is `True` then the strictly upper triangular part +// of each inner-most matrix is assumed to be zero and not accessed. +// If `lower` is False then the strictly lower triangular part of each inner-most +// matrix is assumed to be zero and not accessed. +// `rhs` is a tensor of shape `[..., M, K]`. +// +// The output is a tensor of shape `[..., M, K]`. If `adjoint` is +// `True` then the innermost matrices in `output` satisfy matrix equations +// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `False` then the strictly then the innermost matrices in +// `output` satisfy matrix equations +// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 regulariation. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// Returns the created operation. -func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) { +// Returns Shape is `[..., M, K]`. +func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -12653,304 +11704,274 @@ func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf. a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrl", + Type: "MatrixTriangularSolve", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, lr_power, + matrix, rhs, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes exponential of x element-wise. \\(y = e^x\\). -func Exp(scope *Scope, x tf.Output) (y tf.Output) { +// Saves tensors in V2 checkpoint format. +// +// By default, saves the named tensors in full. If the caller wishes to save +// specific slices of full tensors, "shape_and_slices" should be non-empty strings +// and correspondingly well-formed. +// +// Arguments: +// prefix: Must have a single element. The prefix of the V2 checkpoint to which we +// write the tensors. +// tensor_names: shape {N}. The names of the tensors to be saved. +// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. +// Empty strings indicate that they are non-partitioned tensors. +// tensors: `N` tensors to save. +// +// Returns the created operation. +func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Exp", + Type: "SaveV2", Input: []tf.Input{ - x, + prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// NthElementAttr is an optional argument to NthElement. -type NthElementAttr func(optionalAttr) - -// NthElementReverse sets the optional reverse attribute to value. +// Concatenates quantized tensors along one dimension. // -// value: When set to True, find the nth-largest value in the vector and vice -// versa. -// If not specified, defaults to false -func NthElementReverse(value bool) NthElementAttr { - return func(m optionalAttr) { - m["reverse"] = value +// Arguments: +// concat_dim: 0-D. The dimension along which to concatenate. Must be in the +// range [0, rank(values)). +// values: The `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// input_mins: The minimum scalar values for each of the input tensors. +// input_maxes: The maximum scalar values for each of the input tensors. +// +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "QuantizedConcat", + Input: []tf.Input{ + concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Finds values of the `n`-th order statistic for the last dimension. +// Slice a `SparseTensor` based on the `start` and `size`. // -// If the input is a vector (rank-1), finds the entries which is the nth-smallest -// value in the vector and outputs their values as scalar tensor. +// For example, if the input is // -// For matrices (resp. higher rank input), computes the entries which is the -// nth-smallest value in each row (resp. vector along the last dimension). Thus, +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] // -// values.shape = input.shape[:-1] +// Graphically the output tensors are: +// +// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] +// [ a ] +// [b c ] +// +// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] +// [ d e ] +// [ ] // // Arguments: -// input: 1-D or higher with last dimension at least `n+1`. -// n: 0-D. Position of sorted vector to select along the last dimension (along -// each row for matrices). Valid range of n is `[0, input.shape[:-1])` +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// start: 1-D. tensor represents the start of the slice. +// size: 1-D. tensor represents the size of the slice. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. // -// Returns The `n`-th order statistic along each last dimensional slice. -func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) { +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "NthElement", + Type: "SparseSlice", Input: []tf.Input{ - input, n, + indices, values, shape, start, size, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes the maximum along segments of a tensor. +// Runs multiple additive regression ensemble predictors on input instances and // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// computes the logits. It is designed to be used during prediction. +// It traverses all the trees and calculates the final score for each instance. // -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the maximum such that: +// Arguments: // -// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such -// that `segment_ids[j...] == i`. +// bucketized_features: A list of rank 1 Tensors containing bucket id for each +// feature. +// logits_dimension: scalar, dimension of the logits, to be used for partial logits +// shape. // -// If the maximum is empty for a given segment ID `i`, it outputs the smallest -// possible value for the specific numeric type, -// `output[i] = numeric_limits::lowest()`. +// Returns Output rank 2 Tensor containing logits for each example. +func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"logits_dimension": logits_dimension} + opspec := tf.OpSpec{ + Type: "BoostedTreesPredict", + Input: []tf.Input{ + tree_ensemble_handle, tf.OutputList(bucketized_features), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Pads a tensor with zeros. // -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. +// This operation pads a `input` with zeros according to the `paddings` you +// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the +// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates +// how many zeros to add before the contents of `input` in that dimension, and +// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` +// in that dimension. // -//
-// -//
+// The padded size of each dimension D of the output is: +// +// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` // // For example: // -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 4, 3, 3, 4], -// # [5, 6, 7, 8]] +// ``` +// # 't' is [[1, 1], [2, 2]] +// # 'paddings' is [[1, 1], [2, 2]] +// # rank of 't' is 2 +// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] +// [0, 0, 1, 1, 0, 0] +// [0, 0, 2, 2, 0, 0] +// [0, 0, 0, 0, 0, 0]] // ``` // -// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "UnsortedSegmentMax", + Type: "Pad", Input: []tf.Input{ - data, segment_ids, num_segments, + input, paddings, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a vector of brain.Example protos (as strings) into typed tensors. +// Checks whether a resource handle-based variable has been initialized. // // Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// names: A vector containing the names of the serialized protos. -// May contain, for example, table key (descriptive) names for the -// corresponding serialized protos. These are purely useful for debugging -// purposes, and the presence of values here has no effect on the output. -// May also be an empty vector if no names are available. -// If non-empty, this vector must be the same length as "serialized". -// sparse_keys: A list of Nsparse string Tensors (scalars). -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples' features associated with dense values. -// dense_defaults: A list of Ndense Tensors (some may be empty). -// dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// sparse_types: A list of Nsparse types; the data types of data in each Feature -// given in sparse_keys. -// Currently the ParseExample supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: A list of Ndense shapes; the shapes of data in each Feature -// given in dense_keys. -// The number of elements in the Feature corresponding to dense_key[j] -// must always equal dense_shapes[j].NumEntries(). -// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output -// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN): -// The dense outputs are just the inputs row-stacked by batch. -// This works for dense_shapes[j] = (-1, D1, ..., DN). In this case -// the shape of the output Tensor dense_values[j] will be -// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks -// of elements of length D1 * .... * DN, across all minibatch entries -// in the input. Any minibatch entry with less than M blocks of elements of -// length D1 * ... * DN will be padded with the corresponding default_value -// scalar element along the second dimension. -func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { +// resource: the input resource handle. +// +// Returns a scalar boolean which is true if the variable has been +// initialized. +func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes} opspec := tf.OpSpec{ - Type: "ParseExample", + Type: "VarIsInitializedOp", Input: []tf.Input{ - serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults), + resource, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values + return op.Output(0) } -// Compute the pairwise cross product. -// -// `a` and `b` must be the same shape; they can either be simple 3-element vectors, -// or any shape where the innermost dimension is 3. In the latter case, each pair -// of corresponding 3-element vectors is cross-multiplied independently. -// -// Arguments: -// a: A tensor containing 3-element vectors. -// b: Another tensor, of same type and shape as `a`. +// Returns the min of x and y (i.e. x < y ? x : y) element-wise. // -// Returns Pairwise cross product of the vectors in `a` and `b`. -func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) { +// *NOTE*: `Minimum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Cross", + Type: "Minimum", Input: []tf.Input{ - a, b, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal. -type StatefulStandardNormalAttr func(optionalAttr) - -// StatefulStandardNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs random values from a normal distribution. +// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` // -// The generated values will have mean 0 and standard deviation 1. +// if < 0, `scale * features` otherwise. // -// Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// shape: The shape of the output tensor. +// To be used together with +// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`. +// For correct dropout, use `tf.contrib.nn.alpha_dropout`. // -// Returns A tensor of the specified shape filled with random normal values. -func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) { +// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) +func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StatefulStandardNormal", + Type: "Selu", Input: []tf.Input{ - resource, shape, + features, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2. -type StatefulStandardNormalV2Attr func(optionalAttr) +// SetSizeAttr is an optional argument to SetSize. +type SetSizeAttr func(optionalAttr) -// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr { +// SetSizeValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SetSizeValidateIndices(value bool) SetSizeAttr { return func(m optionalAttr) { - m["dtype"] = value + m["validate_indices"] = value } } -// Outputs random values from a normal distribution. +// Number of unique elements along last dimension of input `set`. // -// The generated values will have mean 0 and standard deviation 1. +// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, +// and `set_shape`. The last dimension contains values in a set, duplicates are +// allowed but ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set` +// indices. // // Arguments: -// resource: The handle of the resource variable that stores the state of the RNG. -// algorithm: The RNG algorithm. -// shape: The shape of the output tensor. +// set_indices: 2D `Tensor`, indices of a `SparseTensor`. +// set_values: 1D `Tensor`, values of a `SparseTensor`. +// set_shape: 1D `Tensor`, shape of a `SparseTensor`. // -// Returns A tensor of the specified shape filled with random normal values. -func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) { +// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st +// `n-1` dimensions as `set`. Each value is the number of unique elements in +// the corresponding `[0...n-1]` dimension of `set`. +func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { if scope.Err() != nil { return } @@ -12959,9 +11980,9 @@ func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "StatefulStandardNormalV2", + Type: "SetSize", Input: []tf.Input{ - resource, algorithm, shape, + set_indices, set_values, set_shape, }, Attrs: attrs, } @@ -12969,471 +11990,419 @@ func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Out return op.Output(0) } -// Locks a mutex resource. The output is the lock. So long as the lock tensor +// Adds sparse `updates` to an existing tensor according to `indices`. // -// is alive, any other request to use `MutexLock` with this mutex will wait. +// This operation creates a new tensor by adding sparse `updates` to the passed +// in `tensor`. +// This operation is very similar to `tf.scatter_nd_add`, except that the updates +// are added onto an existing tensor (as opposed to a variable). If the memory +// for the existing tensor cannot be re-used, a copy is made and updated. // -// This is particularly useful for creating a critical section when used in -// conjunction with `MutexLockIdentity`: +// `indices` is an integer tensor containing indices into a new tensor of shape +// `shape`. The last dimension of `indices` can be at most the rank of `shape`: // -// ```python +// indices.shape[-1] <= shape.rank // -// mutex = mutex_v2( -// shared_name=handle_name, container=container, name=name) +// The last dimension of `indices` corresponds to indices into elements +// (if `indices.shape[-1] = shape.rank`) or slices +// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of +// `shape`. `updates` is a tensor with shape // -// def execute_in_critical_section(fn, *args, **kwargs): -// lock = gen_resource_variable_ops.mutex_lock(mutex) +// indices.shape[:-1] + shape[indices.shape[-1]:] // -// with ops.control_dependencies([lock]): -// r = fn(*args, **kwargs) +// The simplest form of tensor_scatter_add is to add individual elements to a +// tensor by index. For example, say we want to add 4 elements in a rank-1 +// tensor with 8 elements. // -// with ops.control_dependencies(nest.flatten(r)): -// with ops.colocate_with(mutex): -// ensure_lock_exists = mutex_lock_identity(lock) +// In Python, this scatter add operation would look like this: // -// # Make sure that if any element of r is accessed, all of -// # them are executed together. -// r = nest.map_structure(tf.identity, r) +// ```python +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// tensor = tf.ones([8], dtype=tf.int32) +// updated = tf.tensor_scatter_add(tensor, indices, updates) +// with tf.Session() as sess: +// print(sess.run(scatter)) +// ``` // -// with ops.control_dependencies([ensure_lock_exists]): -// return nest.map_structure(tf.identity, r) +// The resulting tensor would look like this: +// +// [1, 12, 1, 11, 10, 1, 1, 13] +// +// We can also, insert entire slices of a higher rank tensor all at once. For +// example, if we wanted to insert two slices in the first dimension of a +// rank-3 tensor with two matrices of new values. +// +// In Python, this scatter add operation would look like this: +// +// ```python +// indices = tf.constant([[0], [2]]) +// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]], +// [[5, 5, 5, 5], [6, 6, 6, 6], +// [7, 7, 7, 7], [8, 8, 8, 8]]]) +// tensor = tf.ones([4, 4, 4]) +// updated = tf.tensor_scatter_add(tensor, indices, updates) +// with tf.Session() as sess: +// print(sess.run(scatter)) // ``` // -// While `fn` is running in the critical section, no other functions which wish to -// use this critical section may run. +// The resulting tensor would look like this: // -// Often the use case is that two executions of the same graph, in parallel, -// wish to run `fn`; and we wish to ensure that only one of them executes -// at a time. This is especially important if `fn` modifies one or more -// variables at a time. +// [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], +// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], +// [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], +// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] // -// It is also useful if two separate functions must share a resource, but we -// wish to ensure the usage is exclusive. +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, the index is ignored. // // Arguments: -// mutex: The mutex resource to lock. +// tensor: Tensor to copy/update. +// indices: Index tensor. +// updates: Updates to scatter into output. // -// Returns A tensor that keeps a shared pointer to a lock on the mutex; -// when the Tensor is destroyed, the use count on the shared pointer is decreased -// by 1. When it reaches 0, the lock is released. -func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) { +// Returns A new tensor copied from tensor and updates added according to the indices. +func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MutexLock", + Type: "TensorScatterAdd", Input: []tf.Input{ - mutex, + tensor, indices, updates, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Transforms a serialized tensorflow.TensorProto proto into a Tensor. +// Computes the sign and the log of the absolute value of the determinant of +// +// one or more square matrices. +// +// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions +// form square matrices. The outputs are two tensors containing the signs and +// absolute values of the log determinants for all N input submatrices +// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). +// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU +// is the LU decomposition of the input and P is the corresponding +// permutation matrix. // // Arguments: -// serialized: A scalar string containing a serialized TensorProto proto. -// out_type: The type of the serialized tensor. The provided type must match the -// type of the serialized tensor and no implicit conversion will take place. +// input: Shape is `[N, M, M]`. // -// Returns A Tensor of type `out_type`. -func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) { +// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants +// of the N input matrices. Shape is `[N]`. +func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "ParseTensor", + Type: "LogMatrixDeterminant", Input: []tf.Input{ - serialized, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax. -type MaxPoolWithArgmaxAttr func(optionalAttr) - -// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value. -// If not specified, defaults to DT_INT64 -func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr { - return func(m optionalAttr) { - m["Targmax"] = value - } + return op.Output(0), op.Output(1) } -// Performs max pooling on the input and outputs both max values and indices. +// Says whether the targets are in the top `K` predictions. // -// The indices in `argmax` are flattened, so that a maximum value at position -// `[b, y, x, c]` becomes flattened index -// `((b * height + y) * width + x) * channels + c`. +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. // -// The indices returned are always in `[0, height) x [0, width)` before flattening, -// even if padding is involved and the mathematically correct answer is outside -// (either negative or too large). This is a bug, but fixing it is difficult to do -// in a safe backwards compatible way, especially due to flattening. +// More formally, let +// +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, +// +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ // // Arguments: -// input: 4-D with shape `[batch, height, width, channels]`. Input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. // -// Returns The max pooled output tensor.4-D. The flattened indices of the max values chosen for each output. -func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) { +// Returns Computed precision at `k` as a `bool Tensor`. +func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MaxPoolWithArgmax", + Type: "InTopKV2", Input: []tf.Input{ - input, + predictions, targets, k, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Creates a TensorList which, when stacked, has the value of `tensor`. +// Check if the input matches the regex pattern. // -// Each tensor in the result list corresponds to one row of the input tensor. +// The input is a string tensor of any shape. The pattern is a scalar +// string tensor which is applied to every element of the input tensor. +// The boolean values (True or False) of the output tensor indicate +// if the input matches the regex pattern provided. // -// tensor: The input tensor. -// output_handle: The list. -func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) { +// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// +// Arguments: +// input: A string tensor of the text to be processed. +// pattern: A scalar string tensor containing the regular expression to match the input. +// +// Returns A bool tensor with the same shape as `input`. +func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListFromTensor", + Type: "RegexFullMatch", Input: []tf.Input{ - tensor, element_shape, + input, pattern, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Assigns sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] = updates[i, ...] +// Converts a `RaggedTensor` into a `SparseTensor` with the same values. // -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = updates[i, ..., j, ...] +// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) +// output=SparseTensor(indices=sparse_indices, values=sparse_values, +// dense_shape=sparse_dense_shape) // // Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// rt_nested_splits: The `row_splits` for the `RaggedTensor`. +// rt_dense_values: The `flat_values` for the `RaggedTensor`. // -// Returns the created operation. -func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// Returns The indices for the `SparseTensor`.The values of the `SparseTensor`.`sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`. +func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ResourceScatterUpdate", + Type: "RaggedTensorToSparse", Input: []tf.Input{ - resource, indices, updates, + tf.OutputList(rt_nested_splits), rt_dense_values, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// MaxPoolAttr is an optional argument to MaxPool. -type MaxPoolAttr func(optionalAttr) +// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2. +type FusedBatchNormGradV2Attr func(optionalAttr) -// MaxPoolDataFormat sets the optional data_format attribute to value. +// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr { + return func(m optionalAttr) { + m["epsilon"] = value + } +} + +// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value. +// +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". // If not specified, defaults to "NHWC" -func MaxPoolDataFormat(value string) MaxPoolAttr { +func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr { return func(m optionalAttr) { m["data_format"] = value } } -// Performs max pooling on the input. +// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value. +// +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// Gradient for batch normalization. +// +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // // Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// y_backprop: A 4D Tensor for the gradient with respect to y. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +// mean to be reused in gradient computation. When is_training is +// False, a 1D Tensor for the population mean to be reused in both +// 1st and 2nd order gradient computation. +// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch +// variance (inverted variance in the cuDNN case) to be reused in +// gradient computation. When is_training is False, a 1D Tensor +// for the population variance to be reused in both 1st and 2nd +// order gradient computation. // -// Returns The max pooled output tensor. -func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) { +// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input +// in FusedBatchNorm. +func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool", + Type: "FusedBatchNormGradV2", Input: []tf.Input{ - input, + y_backprop, x, scale, reserve_space_1, reserve_space_2, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Multiplies sparse updates into the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] *= updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] *= updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions multiply. +// Component-wise multiplies a SparseTensor by a dense Tensor. // -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// The output locations corresponding to the implicitly zero elements in the sparse +// tensor will be zero (i.e., will not take up storage space), regardless of the +// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN). // -//
-// -//
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. // // Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// Returns the created operation. -func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ResourceScatterMul", + Type: "SparseDenseCwiseMul", Input: []tf.Input{ - resource, indices, updates, - }, - } - return scope.AddOperation(opspec) -} - -// Adds sparse updates to the variable referenced by `resource`. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] += updates[...] -// -// # Vector indices (for each i) -// ref[indices[i], ...] += updates[i, ...] -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] += updates[i, ..., j, ...] -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions add. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. -// -// Returns the created operation. -func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ResourceScatterAdd", - Input: []tf.Input{ - resource, indices, updates, + sp_indices, sp_values, sp_shape, dense, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. -type ResourceSparseApplyFtrlV2Attr func(optionalAttr) +// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. +type MaxPool3DGradAttr func(optionalAttr) -// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// MaxPool3DGradDataFormat sets the optional data_format attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["data_format"] = value } } -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. -// -// That is for rows we have grad for, we update var, accum and linear as follows: -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// Computes gradients of max pooling function. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. -// -// lr_power: Scaling factor. Must be a scalar. -// -// Returns the created operation. -func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrlV2", + Type: "MaxPool3DGrad", Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, + orig_input, orig_output, grad, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Calculates gains for each feature and returns the best possible split information for the feature. -// -// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. -// -// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. -// -// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). -// -// The length of output lists are all of the same length, `num_features`. -// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature. -// -// Arguments: -// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). -// stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. -// l1: l1 regularization factor on leaf weights, per instance based. -// l2: l2 regularization factor on leaf weights, per instance based. -// tree_complexity: adjustment to the gain, per leaf based. -// min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting. -// max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors. -// -// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. -func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) { +// Returns the name of the device on which `resource` has been placed. +func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"max_splits": max_splits} opspec := tf.OpSpec{ - Type: "BoostedTreesCalculateBestGainsPerFeature", + Type: "ExperimentalIteratorGetDevice", Input: []tf.Input{ - node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight, + resource, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil { - scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) - return - } - return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list + return op.Output(0) } -// EncodePngAttr is an optional argument to EncodePng. -type EncodePngAttr func(optionalAttr) +// SparseReduceSumAttr is an optional argument to SparseReduceSum. +type SparseReduceSumAttr func(optionalAttr) -// EncodePngCompression sets the optional compression attribute to value. +// SparseReduceSumKeepDims sets the optional keep_dims attribute to value. // -// value: Compression level. -// If not specified, defaults to -1 -func EncodePngCompression(value int64) EncodePngAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr { return func(m optionalAttr) { - m["compression"] = value + m["keep_dims"] = value } } -// PNG-encode an image. +// Computes the sum of elements across dimensions of a SparseTensor. // -// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` -// where `channels` is: +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_sum()`. In particular, this Op also returns a dense `Tensor` +// instead of a sparse one. // -// * 1: for grayscale. -// * 2: for grayscale + alpha. -// * 3: for RGB. -// * 4: for RGBA. +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. // -// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder -// default or a value from 0 to 9. 9 is the highest compression level, generating -// the smallest output, but is slower. +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. // // Arguments: -// image: 3-D with shape `[height, width, channels]`. +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. // -// Returns 0-D. PNG-encoded image. -func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { +// Returns `R-K`-D. The reduced Tensor. +func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -13442,9 +12411,9 @@ func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (conten a(attrs) } opspec := tf.OpSpec{ - Type: "EncodePng", + Type: "SparseReduceSum", Input: []tf.Input{ - image, + input_indices, input_values, input_shape, reduction_axes, }, Attrs: attrs, } @@ -13452,795 +12421,916 @@ func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (conten return op.Output(0) } -// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. -type DataFormatVecPermuteAttr func(optionalAttr) - -// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. -// -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["src_format"] = value +// Records the latency of producing `input_dataset` elements in a StatsAggregator. +func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } -} - -// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { - return func(m optionalAttr) { - m["dst_format"] = value + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ExperimentalLatencyStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the permuted vector/tensor in the destination data format given the +// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. // -// one in the source data format. +// This Op does not require `a_indices` be sorted in standard lexicographic order. // // Arguments: -// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. -// -// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. -func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { +// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. +// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. +// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. +// b: `ndims`-D Tensor. With shape `a_shape`. +func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DataFormatVecPermute", + Type: "SparseTensorDenseAdd", Input: []tf.Input{ - x, + a_indices, a_values, a_shape, b, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Initializes the multi device iterator with the given dataset. +// QuantizedReluAttr is an optional argument to QuantizedRelu. +type QuantizedReluAttr func(optionalAttr) + +// QuantizedReluOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Computes Quantized Rectified Linear: `max(features, 0)` // // Arguments: -// dataset: Dataset to be iterated upon. -// multi_device_iterator: A MultiDeviceIteratorResource. -// max_buffer_size: The maximum size of the host side per device buffer to keep. // -// Returns An int64 indicating which incarnation of the MultiDeviceIterator -// is running. -func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) { +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorInit", + Type: "QuantizedRelu", Input: []tf.Input{ - dataset, multi_device_iterator, max_buffer_size, + features, min_features, max_features, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Reads the value of a variable. +// Reorders a SparseTensor into the canonical, row-major ordering. // -// The tensor returned by this operation is immutable. +// Note that by convention, all sparse ops preserve the canonical ordering along +// increasing dimension number. The only time ordering can be violated is during +// manual manipulation of the indices and values vectors to add entries. // -// The value returned by this operation is guaranteed to be influenced by all the -// writes on which this operation depends directly or indirectly, and to not be -// influenced by any of the writes which depend directly or indirectly on this -// operation. +// Reordering does not affect the shape of the SparseTensor. +// +// If the tensor has rank `R` and `N` non-empty values, `input_indices` has +// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. // // Arguments: -// resource: handle to the resource in which to store the variable. -// dtype: the dtype of the value. -func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) { +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 2-D. `N x R` matrix with the same indices as input_indices, but +// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. +func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "ReadVariableOp", + Type: "SparseReorder", Input: []tf.Input{ - resource, + input_indices, input_values, input_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// This op consumes a lock created by `MutexLock`. +// Split a `SparseTensor` into `num_split` tensors along one dimension. // -// This op exists to consume a tensor created by `MutexLock` (other than -// direct control dependencies). It should be the only that consumes the tensor, -// and will raise an error if it is not. Its only purpose is to keep the -// mutex lock tensor alive until it is consumed by this op. +// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices +// `[0 : shape[split_dim] % num_split]` gets one extra dimension. +// For example, if `split_dim = 1` and `num_split = 2` and the input is // -// **NOTE**: This operation must run on the same device as its input. This may -// be enforced via the `colocate_with` mechanism. +// input_tensor = shape = [2, 7] +// [ a d e ] +// [b c ] +// +// Graphically the output tensors are: +// +// output_tensor[0] = shape = [2, 4] +// [ a ] +// [b c ] +// +// output_tensor[1] = shape = [2, 3] +// [ d e ] +// [ ] // // Arguments: -// mutex_lock: A tensor returned by `MutexLock`. +// split_dim: 0-D. The dimension along which to split. Must be in the range +// `[0, rank(shape))`. +// indices: 2-D tensor represents the indices of the sparse tensor. +// values: 1-D tensor represents the values of the sparse tensor. +// shape: 1-D. tensor represents the shape of the sparse tensor. +// output indices: A list of 1-D tensors represents the indices of the output +// sparse tensors. +// num_split: The number of ways to split. // -// Returns the created operation. -func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { +// Returns A list of 1-D tensors represents the values of the output sparse +// tensors.A list of 1-D tensors represents the shape of the output sparse +// tensors. +func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "ConsumeMutexLock", + Type: "SparseSplit", Input: []tf.Input{ - mutex_lock, + split_dim, indices, values, shape, }, + Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd. -type ResourceScatterNdAddAttr func(optionalAttr) - -// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value. -// -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { - return func(m optionalAttr) { - m["use_locking"] = value + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { + scope.UpdateErr("SparseSplit", err) + return + } + if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { + scope.UpdateErr("SparseSplit", err) + return } + return output_indices, output_values, output_shape } -// Applies sparse addition to individual values or slices in a Variable. +// Applies sparse addition to `input` using individual values or slices // -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// from `updates` according to indices `indices`. The updates are non-aliasing: +// `input` is only modified in-place if no other operations will use it. +// Otherwise, a copy of `input` is made. This operation has a gradient with +// respect to both `input` and `updates`. // -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// +// `indices` must be integer tensor, containing indices into `input`. +// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. // // The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. +// indices into elements (if `K = P`) or `(P-K)`-dimensional slices +// (if `K < P`) along the `K`th dimension of `input`. // // `updates` is `Tensor` of rank `Q-1+P-K` with shape: // -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] -// ``` +// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ // -// For example, say we want to add 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that addition would look like this: +// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 +// elements. In Python, that addition would look like this: // -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// add = tf.scatter_nd_add(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(add) -// ``` +// input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8]) +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// output = tf.scatter_nd_non_aliasing_add(input, indices, updates) +// with tf.Session() as sess: +// print(sess.run(output)) // -// The resulting update to ref would look like this: +// The resulting value `output` would look like this: // // [1, 13, 3, 14, 14, 6, 7, 20] // -// See `tf.scatter_nd` for more details about how to make updates to -// slices. +// See `tf.scatter_nd` for more details about how to make updates to slices. // // Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of -// values to add to ref. +// input: A Tensor. +// indices: A Tensor. Must be one of the following types: `int32`, `int64`. +// A tensor of indices into `input`. +// updates: A Tensor. Must have the same type as ref. A tensor of updated values +// to add to `input`. // -// Returns the created operation. -func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) { +// Returns A `Tensor` with the same shape as `input`, containing values of `input` +// updated with `updates`. +func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceScatterNdAdd", + Type: "ScatterNdNonAliasingAdd", Input: []tf.Input{ - ref, indices, updates, + input, indices, updates, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Updates the tree ensemble by either adding a layer to the last tree being grown -// -// or by starting a new tree. +// Creates a MultiDeviceIterator resource. // // Arguments: -// tree_ensemble_handle: Handle to the ensemble variable. -// feature_ids: Rank 1 tensor with ids for each feature. This is the real id of -// the feature that will be used in the split. -// node_ids: List of rank 1 tensors representing the nodes for which this feature -// has a split. -// gains: List of rank 1 tensors representing the gains for each of the feature's -// split. -// thresholds: List of rank 1 tensors representing the thesholds for each of the -// feature's split. -// left_node_contribs: List of rank 2 tensors with left leaf contribs for each of -// the feature's splits. Will be added to the previous node values to constitute -// the values of the left nodes. -// right_node_contribs: List of rank 2 tensors with right leaf contribs for each -// of the feature's splits. Will be added to the previous node values to constitute -// the values of the right nodes. -// max_depth: Max depth of the tree to build. -// learning_rate: shrinkage const for each new tree. -// pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning. -// -// Returns the created operation. -func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) { - if scope.Err() != nil { - return +// devices: A list of devices the iterator works across. +// shared_name: If non-empty, this resource will be shared under the given name +// across multiple sessions. +// container: If non-empty, this resource is placed in the given container. +// Otherwise, a default container is used. +// output_types: The type list for the return values. +// output_shapes: The list of shapes being produced. +// +// Returns Handle to the resource created. +func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } - attrs := map[string]interface{}{"pruning_mode": pruning_mode} + attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "BoostedTreesUpdateEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate, - }, + Type: "MultiDeviceIterator", + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes tan of x element-wise. -func Tan(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return +// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. +type FractionalMaxPoolAttr func(optionalAttr) + +// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value. +// +// value: When set to True, generates the pooling sequence in a +// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin +// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for +// difference between pseudorandom and random. +// If not specified, defaults to false +func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr { + return func(m optionalAttr) { + m["pseudo_random"] = value } - opspec := tf.OpSpec{ - Type: "Tan", - Input: []tf.Input{ - x, - }, +} + +// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value. +// +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: +// +// `index 0 1 2 3 4` +// +// `value 20 5 16 3 7` +// +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [20, 16] for fractional max pooling. +// If not specified, defaults to false +func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr { + return func(m optionalAttr) { + m["overlapping"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Deprecated. Use TensorArraySplitV3 +// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value. // -// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 -func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return +// value: When set to True, a fixed pooling region will be used when +// iterating over a FractionalMaxPool node in the computation graph. Mainly used +// in unit test to make FractionalMaxPool deterministic. +// If not specified, defaults to false +func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr { + return func(m optionalAttr) { + m["deterministic"] = value } - opspec := tf.OpSpec{ - Type: "TensorArraySplitV2", - Input: []tf.Input{ - handle, value, lengths, flow_in, - }, +} + +// FractionalMaxPoolSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr { + return func(m optionalAttr) { + m["seed"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Reshapes a SparseTensor to represent values in a new dense shape. +// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value. // -// This operation has the same semantics as reshape on the represented dense -// tensor. The `input_indices` are recomputed based on the requested `new_shape`. +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Performs fractional max pooling on the input. // -// If one component of `new_shape` is the special value -1, the size of that -// dimension is computed so that the total dense size remains constant. At -// most one component of `new_shape` can be -1. The number of dense elements -// implied by `new_shape` must be the same as the number of dense elements -// originally implied by `input_shape`. +// Fractional max pooling is slightly different than regular max pooling. In +// regular max pooling, you downsize an input set by taking the maximum value of +// smaller N x N subsections of the set (often 2x2), and try to reduce the set by +// a factor of N, where N is an integer. Fractional max pooling, as you might +// expect from the word "fractional", means that the overall reduction ratio N +// does not have to be an integer. // -// Reshaping does not affect the order of values in the SparseTensor. +// The sizes of the pooling regions are generated randomly but are fairly uniform. +// For example, let's look at the height dimension, and the constraints on the +// list of rows that will be pool boundaries. // -// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` -// has length `R_out`, then `input_indices` has shape `[N, R_in]`, -// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and -// `output_shape` has length `R_out`. +// First we define the following: +// +// 1. input_row_length : the number of rows from the input set +// 2. output_row_length : which will be smaller than the input +// 3. alpha = input_row_length / output_row_length : our reduction ratio +// 4. K = floor(alpha) +// 5. row_pooling_sequence : this is the result list of pool boundary rows +// +// Then, row_pooling_sequence should satisfy: +// +// 1. a[0] = 0 : the first value of the sequence is 0 +// 2. a[end] = input_row_length : the last value of the sequence is the size +// 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size +// 4. length(row_pooling_sequence) = output_row_length+1 +// +// For more details on fractional max pooling, see this paper: +// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) // // Arguments: -// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a -// SparseTensor. -// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. -// new_shape: 1-D. `R_out` vector with the requested new dense shape. +// value: 4-D with shape `[batch, height, width, channels]`. +// pooling_ratio: Pooling ratio for each dimension of `value`, currently only +// supports row and col dimension and should be >= 1.0. For example, a valid +// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements +// must be 1.0 because we don't allow pooling on batch and channels +// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions +// respectively. // -// Returns 2-D. `N x R_out` matrix with the updated indices of non-empty -// values in the output SparseTensor.1-D. `R_out` vector with the full dense shape of the output -// SparseTensor. This is the same as `new_shape` but with any -1 dimensions -// filled in. -func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { +// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient. +func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseReshape", + Type: "FractionalMaxPool", Input: []tf.Input{ - input_indices, input_shape, new_shape, + value, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// Computes the product along segments of a tensor. +// Generates sparse cross from a list of sparse and dense tensors. // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each +// representing features of one feature column. It outputs a 2D `SparseTensor` with +// the batchwise crosses of these features. // -// Computes a tensor such that -// \\(output_i = \prod_j data_j\\) where the product is over `j` such -// that `segment_ids[j] == i`. +// For example, if the inputs are // -// If the product is empty for a given segment ID `i`, `output[i] = 1`. +// inputs[0]: SparseTensor with shape = [2, 2] +// [0, 0]: "a" +// [1, 0]: "b" +// [1, 1]: "c" // -//
-// -//
+// inputs[1]: SparseTensor with shape = [2, 1] +// [0, 0]: "d" +// [1, 0]: "e" // -// For example: +// inputs[2]: Tensor [["f"], ["g"]] // -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_prod(c, tf.constant([0, 0, 1])) -// # ==> [[4, 6, 6, 4], -// # [5, 6, 7, 8]] -// ``` +// then the output will be +// +// shape = [2, 2] +// [0, 0]: "a_X_d_X_f" +// [1, 0]: "b_X_e_X_g" +// [1, 1]: "c_X_e_X_g" +// +// if hashed_output=true then the output will be // +// shape = [2, 2] +// [0, 0]: FingerprintCat64( +// Fingerprint64("f"), FingerprintCat64( +// Fingerprint64("d"), Fingerprint64("a"))) +// [1, 0]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("b"))) +// [1, 1]: FingerprintCat64( +// Fingerprint64("g"), FingerprintCat64( +// Fingerprint64("e"), Fingerprint64("c"))) // // Arguments: +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// dense_inputs: 2-D. Columns represented by dense `Tensor`. +// hashed_output: If true, returns the hash of the cross instead of the string. +// This will allow us avoiding string manipulations. +// num_buckets: It is used if hashed_output is true. +// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. +// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` +// function to combine the crosses fingerprints. // -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed +// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} opspec := tf.OpSpec{ - Type: "SegmentProd", + Type: "SparseCross", Input: []tf.Input{ - data, segment_ids, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Bucketizes 'input' based on 'boundaries'. +// Inverse real-valued fast Fourier transform. // -// For example, if the inputs are -// boundaries = [0, 10, 100] -// input = [[-5, 10000] -// [150, 10] -// [5, 100]] +// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most dimension of `input`. // -// then the output will be -// output = [[0, 3] -// [3, 2] -// [1, 3]] +// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the +// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If +// `fft_length` is not provided, it is computed from the size of the inner-most +// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to +// compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller +// than the corresponding dimension of `input`, the dimension is cropped. If it is +// larger, the dimension is padded with zeros. // // Arguments: -// input: Any shape of Tensor contains with int or float type. -// boundaries: A sorted list of floats gives the boundary of the buckets. +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. // -// Returns Same shape with 'input', each value of input replaced with bucket index. +// Returns A float32 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length` samples of its inverse +// 1D Fourier transform. // // @compatibility(numpy) -// Equivalent to np.digitize. +// Equivalent to np.fft.irfft // @end_compatibility -func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { +func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"boundaries": boundaries} opspec := tf.OpSpec{ - Type: "Bucketize", + Type: "IRFFT", Input: []tf.Input{ - input, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. -type StatelessTruncatedNormalAttr func(optionalAttr) - -// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. +// Concatenates a list of `SparseTensor` along the specified dimension. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } -} - -// Outputs deterministic pseudorandom values from a truncated normal distribution. +// Concatenation is with respect to the dense versions of these sparse tensors. +// It is assumed that each input is a `SparseTensor` whose elements are ordered +// along increasing dimension number. // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. +// All inputs' shapes must match, except for the concat dimension. The +// `indices`, `values`, and `shapes` lists must have the same length. // -// The outputs are a deterministic function of `shape` and `seed`. +// The output shape is identical to the inputs', except along the concat +// dimension, where it is the sum of the inputs' sizes along that dimension. +// +// The output elements will be resorted to preserve the sort order along +// increasing dimension number. +// +// This op runs in `O(M log M)` time, where `M` is the total number of non-empty +// values across all inputs. This is due to the need for an internal sort in +// order to concatenate efficiently across an arbitrary dimension. +// +// For example, if `concat_dim = 1` and the inputs are +// +// sp_inputs[0]: shape = [2, 3] +// [0, 2]: "a" +// [1, 0]: "b" +// [1, 1]: "c" +// +// sp_inputs[1]: shape = [2, 4] +// [0, 1]: "d" +// [0, 2]: "e" +// +// then the output will be +// +// shape = [2, 7] +// [0, 2]: "a" +// [0, 4]: "d" +// [0, 5]: "e" +// [1, 0]: "b" +// [1, 1]: "c" +// +// Graphically this is equivalent to doing +// +// [ a] concat [ d e ] = [ a d e ] +// [b c ] [ ] [b c ] // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// indices: 2-D. Indices of each input `SparseTensor`. +// values: 1-D. Non-empty values of each `SparseTensor`. +// shapes: 1-D. Shapes of each `SparseTensor`. +// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), +// where rank is the number of dimensions in each input `SparseTensor`. // -// Returns Random values with specified shape. -func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { +// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. +func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"concat_dim": concat_dim} opspec := tf.OpSpec{ - Type: "StatelessTruncatedNormal", + Type: "SparseConcat", Input: []tf.Input{ - shape, seed, + tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RestoreSliceAttr is an optional argument to RestoreSlice. -type RestoreSliceAttr func(optionalAttr) - -// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. See the documentation for `Restore`. -// If not specified, defaults to -1 -func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { - return func(m optionalAttr) { - m["preferred_shard"] = value - } + return op.Output(0), op.Output(1), op.Output(2) } -// Restores a tensor from checkpoint files. -// -// This is like `Restore` except that restored tensor can be listed as filling -// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the -// larger tensor and the slice that the restored tensor covers. -// -// The `shape_and_slice` input has the same format as the -// elements of the `shapes_and_slices` input of the `SaveSlices` op. -// -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// shape_and_slice: Scalar. The shapes and slice specifications to use when -// restoring a tensors. -// dt: The type of the tensor to be restored. +// Elementwise computes the bitwise AND of `x` and `y`. // -// Returns The restored tensor. -func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { +// The result will have those bits set, that are set in both `x` and `y`. The +// computation is performed on the underlying representations of `x` and `y`. +func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "RestoreSlice", + Type: "BitwiseAnd", Input: []tf.Input{ - file_pattern, tensor_name, shape_and_slice, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Divides sparse updates into the variable referenced by `resource`. +// Deserialize and concatenate `SparseTensors` from a serialized minibatch. // -// This operation computes +// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where +// `N` is the minibatch size and the rows correspond to packed outputs of +// `SerializeSparse`. The ranks of the original `SparseTensor` objects +// must all match. When the final `SparseTensor` is created, it has rank one +// higher than the ranks of the incoming `SparseTensor` objects +// (they have been concatenated along a new row dimension). // -// # Scalar indices -// ref[indices, ...] /= updates[...] +// The output `SparseTensor` object's shape values for all dimensions but the +// first are the max across the input `SparseTensor` objects' shape values +// for the corresponding dimensions. Its first shape value is `N`, the minibatch +// size. // -// # Vector indices (for each i) -// ref[indices[i], ...] /= updates[i, ...] +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. // -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...] +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: // -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions multiply. +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] // -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// and // -//
-// -//
+// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] // -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// then the final deserialized `SparseTensor` will be: // -// Returns the created operation. -func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] +// +// Arguments: +// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. +// Must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "ResourceScatterDiv", + Type: "DeserializeManySparse", Input: []tf.Input{ - resource, indices, updates, + serialized_sparse, }, + Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. -type StatelessRandomNormalAttr func(optionalAttr) - -// StatelessRandomNormalDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { - return func(m optionalAttr) { - m["dtype"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Outputs deterministic pseudorandom values from a normal distribution. +// Deserialize `SparseTensor` objects. // -// The generated values will have mean 0 and standard deviation 1. +// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where +// the last dimension stores serialized `SparseTensor` objects and the other N +// dimensions (N >= 0) correspond to a batch. The ranks of the original +// `SparseTensor` objects must all match. When the final `SparseTensor` is +// created, its rank is the rank of the incoming `SparseTensor` objects plus N; +// the sparse tensors have been concatenated along new dimensions, one for each +// batch. // -// The outputs are a deterministic function of `shape` and `seed`. +// The output `SparseTensor` object's shape values for the original dimensions +// are the max across the input `SparseTensor` objects' shape values for the +// corresponding dimensions. The new dimensions match the size of the batch. // -// Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// The input `SparseTensor` objects' indices are assumed ordered in +// standard lexicographic order. If this is not the case, after this +// step run `SparseReorder` to restore index ordering. // -// Returns Random values with specified shape. -func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { +// For example, if the serialized input is a `[2 x 3]` matrix representing two +// original `SparseTensor` objects: +// +// index = [ 0] +// [10] +// [20] +// values = [1, 2, 3] +// shape = [50] +// +// and +// +// index = [ 2] +// [10] +// values = [4, 5] +// shape = [30] +// +// then the final deserialized `SparseTensor` will be: +// +// index = [0 0] +// [0 10] +// [0 20] +// [1 2] +// [1 10] +// values = [1, 2, 3, 4, 5] +// shape = [2 50] +// +// Arguments: +// serialized_sparse: The serialized `SparseTensor` objects. The last dimension +// must have 3 columns. +// dtype: The `dtype` of the serialized `SparseTensor` objects. +func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "StatelessRandomNormal", + Type: "DeserializeSparse", Input: []tf.Input{ - shape, seed, + serialized_sparse, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// UnicodeDecodeAttr is an optional argument to UnicodeDecode. -type UnicodeDecodeAttr func(optionalAttr) +// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. +type MaxPool3DGradGradAttr func(optionalAttr) -// UnicodeDecodeErrors sets the optional errors attribute to value. +// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. // -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeErrors(value string) UnicodeDecodeAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { return func(m optionalAttr) { - m["errors"] = value + m["data_format"] = value } } -// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value. +// Computes second-order gradients of the maxpooling function. // -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr { - return func(m optionalAttr) { - m["replacement_char"] = value +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3DGradGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2. +type Conv3DBackpropFilterV2Attr func(optionalAttr) + +// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value. // -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { return func(m optionalAttr) { - m["replace_control_characters"] = value + m["data_format"] = value } } -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// -// The `row_splits` tensor indicates where the codepoints for -// each input string begin and end within the `char_values` tensor. -// In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: +// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value. // -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of 3-D convolution with respect to the filter. // // Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// -// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints. -func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) { +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 5-D +// `[filter_depth, filter_height, filter_width, in_channels, out_channels]` +// tensor. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"input_encoding": input_encoding} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UnicodeDecode", + Type: "Conv3DBackpropFilterV2", Input: []tf.Input{ - input, + input, filter_sizes, out_backprop, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Adds up a SparseTensor and a dense Tensor, using these special rules: -// -// (1) Broadcasts the dense side to have the same shape as the sparse side, if -// eligible; -// (2) Then, only the dense values pointed to by the indices of the SparseTensor -// participate in the cwise addition. +// Execute a sub graph on a remote processor. // -// By these rules, the result is a logical SparseTensor with exactly the same -// indices and shape, but possibly with different non-zero values. The output of -// this Op is the resultant non-zero values. +// The graph specifications(such as graph itself, input tensors and output names) +// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo +// as serialized_remote_fused_graph_execute_info. +// The specifications will be passed to a dedicated registered +// remote fused graph executor. The executor will send the graph specifications +// to a remote processor and execute that graph. The execution results +// will be passed to consumer nodes as outputs of this node. // // Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// inputs: Arbitrary number of tensors with arbitrary data types // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// serialized_remote_fused_graph_execute_info: Serialized protocol buffer +// of RemoteFusedGraphExecuteInfo which contains graph specifications. +// +// Returns Arbitrary number of tensors with arbitrary data types +func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info} opspec := tf.OpSpec{ - Type: "SparseDenseCwiseAdd", + Type: "RemoteFusedGraphExecute", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + tf.OutputList(inputs), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the complementary error function of `x` element-wise. -func Erfc(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Erfc", - Input: []tf.Input{ - x, - }, + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("RemoteFusedGraphExecute", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return outputs } -// UnicodeEncodeAttr is an optional argument to UnicodeEncode. -type UnicodeEncodeAttr func(optionalAttr) - -// UnicodeEncodeErrors sets the optional errors attribute to value. -// -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeEncodeErrors(value string) UnicodeEncodeAttr { - return func(m optionalAttr) { - m["errors"] = value - } -} +// SerializeManySparseAttr is an optional argument to SerializeManySparse. +type SerializeManySparseAttr func(optionalAttr) -// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value. +// SerializeManySparseOutType sets the optional out_type attribute to value. // -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD (U+65533). -// If not specified, defaults to 65533 -func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr { +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { return func(m optionalAttr) { - m["replacement_char"] = value + m["out_type"] = value } } -// Encode a tensor of ints into unicode strings. -// -// Returns a vector of strings, where `output[i]` is constructed by encoding the -// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]` -// using `output_encoding`. -// -// --- -// -// Example: +// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. // -// ``` -// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100] -// input_splits = [0, 5, 10] -// output_encoding = 'UTF-8' +// The `SparseTensor` must have rank `R` greater than 1, and the first dimension +// is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The serialized +// `SparseTensor` objects going into each row of `serialized_sparse` will have +// rank `R-1`. // -// output = ['Hello', 'World'] -// ``` +// The minibatch size `N` is extracted from `sparse_shape[0]`. // // Arguments: -// input_values: A 1D tensor containing the unicode codepoints that should be encoded. -// input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings. -// In particular, `output[i]` is constructed by encoding the codepoints in the -// slice `input_values[input_splits[i]:input_splits[i+1]]`. -// output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8", -// "UTF-16-BE", and "UTF-32-BE"`. -// -// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints. -func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) { +// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. +// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. +func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_encoding": output_encoding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UnicodeEncode", + Type: "SerializeManySparse", Input: []tf.Input{ - input_values, input_splits, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } @@ -14248,224 +13338,148 @@ func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, return op.Output(0) } -// Returns the number of tensors in the input tensor list. -// -// input_handle: the input list -// length: the number of tensors in the list -func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) { +// Computes inverse hyperbolic cosine of x element-wise. +func Acosh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListLength", + Type: "Acosh", Input: []tf.Input{ - input_handle, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Determine the script codes of a given tensor of Unicode integer code points. -// -// This operation converts Unicode code points to script codes corresponding to -// each code point. Script codes correspond to International Components for -// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html. -// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will -// match input shape. +// Computes rectified linear 6 gradients for a Relu6 operation. // // Arguments: -// input: A Tensor of int32 Unicode code points. +// gradients: The backpropagated gradients to the corresponding Relu6 operation. +// features: The features passed as input to the corresponding Relu6 operation, or +// its output; using either one produces the same result. // -// Returns A Tensor of int32 script codes corresponding to each input code point. -func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) { +// Returns The gradients: +// `gradients * (features > 0) * (features < 6)`. +func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "UnicodeScript", + Type: "Relu6Grad", Input: []tf.Input{ - input, + gradients, features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a sequence of numbers. -// -// This operation creates a sequence of numbers that begins at `start` and -// extends by increments of `delta` up to but not including `limit`. -// -// For example: -// -// ``` -// # 'start' is 3 -// # 'limit' is 18 -// # 'delta' is 3 -// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] -// ``` -// -// Arguments: -// start: 0-D (scalar). First entry in the sequence. -// limit: 0-D (scalar). Upper limit of sequence, exclusive. -// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. +// Computes natural logarithm of (1 + x) element-wise. // -// Returns 1-D. -func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { +// I.e., \\(y = \log_e (1 + x)\\). +func Log1p(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Range", + Type: "Log1p", Input: []tf.Input{ - start, limit, delta, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. -type OrderedMapUnstageNoKeyAttr func(optionalAttr) +// ResizeBicubicAttr is an optional argument to ResizeBicubic. +type ResizeBicubicAttr func(optionalAttr) -// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// ResizeBicubicAlignCorners sets the optional align_corners attribute to value. // -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr { return func(m optionalAttr) { - m["capacity"] = value + m["align_corners"] = value } } -// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Resize `images` to `size` using bicubic interpolation. // -// REQUIRES: value >= 0 -func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns the (key, value) element with the smallest +// Input images can be of different types but output images are always float. // -// key from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapUnstageNoKey", + Type: "ResizeBicubic", Input: []tf.Input{ - indices, + images, size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("OrderedMapUnstageNoKey", err) - return - } - return key, values -} - -// Returns element-wise integer closest to x. -// -// If the result is midway between two representable values, -// the even representable is chosen. -// For example: -// -// ``` -// rint(-1.5) ==> -2.0 -// rint(0.5000001) ==> 1.0 -// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] -// ``` -func Rint(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Rint", - Input: []tf.Input{ - x, - }, - } - op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) +// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. +type SparseTensorDenseMatMulAttr func(optionalAttr) -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. +// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: Use the adjoint of A in the matrix multiply. If A is complex, this +// is transpose(conj(A)). Otherwise it's transpose(A). // If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { +func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["adjoint_a"] = value } } -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. +// value: Use the adjoint of B in the matrix multiply. If B is complex, this +// is transpose(conj(B)). Otherwise it's transpose(B). // If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { +func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { return func(m optionalAttr) { - m["use_nesterov"] = value + m["adjoint_b"] = value } } -// Update '*var' according to the momentum scheme. Set use_nesterov = True if you +// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". // -// want to use Nesterov momentum. +// No validity checking is performed on the indices of A. However, the following +// input format is recommended for optimal behavior: // -// accum = accum * momentum + grad -// var -= lr * accum +// if adjoint_a == false: +// A should be sorted in lexicographically increasing order. Use SparseReorder +// if you're not sure. +// if adjoint_a == true: +// A should be sorted in order of increasing dimension 1 (i.e., "column major" +// order instead of "row major" order). // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { +// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. +// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. +// b: 2-D. A dense Matrix. +func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -14474,117 +13488,114 @@ func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf. a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", + Type: "SparseTensorDenseMatMul", Input: []tf.Input{ - var_, accum, lr, grad, momentum, + a_indices, a_values, a_shape, b, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// SubstrAttr is an optional argument to Substr. -type SubstrAttr func(optionalAttr) +// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. +type DecodeAndCropJpegAttr func(optionalAttr) -// SubstrUnit sets the optional unit attribute to value. +// DecodeAndCropJpegChannels sets the optional channels attribute to value. // -// value: The unit that is used to create the substring. One of: `"BYTE"` (for -// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8 -// encoded Unicode code points). The default is `"BYTE"`. Results are undefined if -// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid -// UTF-8. -// If not specified, defaults to "BYTE" -func SubstrUnit(value string) SubstrAttr { +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { return func(m optionalAttr) { - m["unit"] = value + m["channels"] = value } } -// Return substrings from `Tensor` of strings. -// -// For each string in the input `Tensor`, creates a substring starting at index -// `pos` with a total length of `len`. -// -// If `len` defines a substring that would extend beyond the length of the input -// string, then as many characters as possible are used. -// -// A negative `pos` indicates distance within the string backwards from the end. -// -// If `pos` specifies an index which is out of range for any of the input strings, -// then an `InvalidArgumentError` is thrown. -// -// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on -// Op creation. -// -// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about -// broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -// -// --- -// -// Examples +// DecodeAndCropJpegRatio sets the optional ratio attribute to value. // -// Using scalar `pos` and `len`: +// value: Downscaling ratio. +// If not specified, defaults to 1 +func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["ratio"] = value + } +} + +// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. // -// ```python -// input = [b'Hello', b'World'] -// position = 1 -// length = 3 +// value: If true use a slower but nicer upscaling of the +// chroma planes (yuv420/422 only). +// If not specified, defaults to true +func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["fancy_upscaling"] = value + } +} + +// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. // -// output = [b'ell', b'orl'] -// ``` +// value: If true try to recover an image from truncated input. +// If not specified, defaults to false +func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["try_recover_truncated"] = value + } +} + +// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. // -// Using `pos` and `len` with same shape as `input`: +// value: The minimum required fraction of lines before a truncated +// input is accepted. +// If not specified, defaults to 1 +func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["acceptable_fraction"] = value + } +} + +// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. // -// ```python -// input = [[b'ten', b'eleven', b'twelve'], -// [b'thirteen', b'fourteen', b'fifteen'], -// [b'sixteen', b'seventeen', b'eighteen']] -// position = [[1, 2, 3], -// [1, 2, 3], -// [1, 2, 3]] -// length = [[2, 3, 4], -// [4, 3, 2], -// [5, 5, 5]] +// value: string specifying a hint about the algorithm used for +// decompression. Defaults to "" which maps to a system-specific +// default. Currently valid values are ["INTEGER_FAST", +// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal +// jpeg library changes to a version that does not have that specific +// option.) +// If not specified, defaults to "" +func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { + return func(m optionalAttr) { + m["dct_method"] = value + } +} + +// Decode and Crop a JPEG-encoded image to a uint8 tensor. // -// output = [[b'en', b'eve', b'lve'], -// [b'hirt', b'urt', b'te'], -// [b'ixtee', b'vente', b'hteen']] -// ``` +// The attr `channels` indicates the desired number of color channels for the +// decoded image. // -// Broadcasting `pos` and `len` onto `input`: +// Accepted values are: // -// ``` -// input = [[b'ten', b'eleven', b'twelve'], -// [b'thirteen', b'fourteen', b'fifteen'], -// [b'sixteen', b'seventeen', b'eighteen'], -// [b'nineteen', b'twenty', b'twentyone']] -// position = [1, 2, 3] -// length = [1, 2, 3] +// * 0: Use the number of channels in the JPEG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. // -// output = [[b'e', b'ev', b'lve'], -// [b'h', b'ur', b'tee'], -// [b'i', b've', b'hte'], -// [b'i', b'en', b'nty']] -// ``` +// If needed, the JPEG-encoded image is transformed to match the requested number +// of color channels. // -// Broadcasting `input` onto `pos` and `len`: +// The attr `ratio` allows downscaling the image by an integer factor during +// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than +// downscaling the image later. // -// ``` -// input = b'thirteen' -// position = [1, 5, 7] -// length = [3, 2, 1] // -// output = [b'hir', b'ee', b'n'] -// ``` +// It is equivalent to a combination of decode and crop, but much faster by only +// decoding partial jpeg image. // // Arguments: -// input: Tensor of strings -// pos: Scalar defining the position of first character in each substring -// len: Scalar defining the number of characters to include in each substring +// contents: 0-D. The JPEG-encoded image. +// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. // -// Returns Tensor of substrings -func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) { +// Returns 3-D with shape `[height, width, channels]`.. +func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { if scope.Err() != nil { return } @@ -14593,9 +13604,9 @@ func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optiona a(attrs) } opspec := tf.OpSpec{ - Type: "Substr", + Type: "DecodeAndCropJpeg", Input: []tf.Input{ - input, pos, len, + contents, crop_window, }, Attrs: attrs, } @@ -14603,294 +13614,479 @@ func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optiona return op.Output(0) } -// Exits the current frame to its parent frame. +// Adds two `SparseTensor` objects to produce another `SparseTensor`. // -// Exit makes its input `data` available to the parent frame. +// The input `SparseTensor` objects' indices are assumed ordered in standard +// lexicographic order. If this is not the case, before this step run +// `SparseReorder` to restore index ordering. // -// Arguments: -// data: The tensor to be made available to the parent frame. +// By default, if two values sum to zero at some index, the output `SparseTensor` +// would still include that particular location in its index, storing a zero in the +// corresponding value slot. To override this, callers can specify `thresh`, +// indicating that if the sum has a magnitude strictly smaller than `thresh`, its +// corresponding value and index would then not be included. In particular, +// `thresh == 0` (default) means everything is kept and actual thresholding happens +// only for a positive value. // -// Returns The same tensor as `data`. -func Exit(scope *Scope, data tf.Output) (output tf.Output) { +// In the following shapes, `nnz` is the count after taking `thresh` into account. +// +// Arguments: +// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. +// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. +// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. +// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. +// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. +// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. +// thresh: 0-D. The magnitude threshold that determines if an output value/index +// pair takes space. +func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Exit", + Type: "SparseAdd", Input: []tf.Input{ - data, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Produce a string tensor that encodes the state of a Reader. +// EnqueueTPUEmbeddingSparseTensorBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseTensorBatch. +type EnqueueTPUEmbeddingSparseTensorBatchAttr func(optionalAttr) + +// EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal sets the optional device_ordinal attribute to value. // -// Not all Readers support being serialized, so this can produce an -// Unimplemented error. +// value: The TPU device to use. Should be >= 0 and less than the number +// of TPU cores in the task on which the node is placed. +// If not specified, defaults to -1 +func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseTensorBatchAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// EnqueueTPUEmbeddingSparseTensorBatchCombiners sets the optional combiners attribute to value. // -// Arguments: -// reader_handle: Handle to a Reader. -func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) { +// value: A list of string scalars, one for each embedding table that specify +// how to normalize the embedding activations after weighted summation. +// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have +// the sum of the weights be 0 for 'mean' or the sum of the squared weights be +// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for +// all tables. +// If not specified, defaults to <> +func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr { + return func(m optionalAttr) { + m["combiners"] = value + } +} + +// Eases the porting of code that uses tf.nn.embedding_lookup_sparse(). +// +// sample_indices[i], embedding_indices[i] and aggregation_weights[i] correspond +// to the ith feature. table_ids[i] indicates which embedding table to look up ith +// feature. +// +// The tensors at corresponding positions in the three input lists (sample_indices, +// embedding_indices and aggregation_weights) must have the same shape, i.e. rank 1 +// with dim_size() equal to the total number of lookups into the table described by +// the corresponding feature. +// +// Arguments: +// sample_indices: A list of rank 1 Tensors specifying the training example to +// which the corresponding embedding_indices and aggregation_weights values +// belong. It corresponds to sp_ids.indices[:,0] in embedding_lookup_sparse(). +// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. +// It corresponds to sp_ids.values in embedding_lookup_sparse(). +// aggregation_weights: A list of rank 1 Tensors containing per training example +// aggregation weights. It corresponds to sp_weights.values in +// embedding_lookup_sparse(). +// mode_override: A string input that overrides the mode specified in the +// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', +// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set +// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. +// table_ids: A list of integers specifying the identifier of the embedding table +// (offset of TableDescriptor in the TPUEmbeddingConfiguration) to lookup the +// corresponding input. The ith input is looked up using table_ids[i]. The size +// of the table_ids list must be equal to that of sample_indices, +// embedding_indices and aggregation_weights. +// +// Returns the created operation. +func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, table_ids []int64, optional ...EnqueueTPUEmbeddingSparseTensorBatchAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"table_ids": table_ids} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ReaderSerializeStateV2", + Type: "EnqueueTPUEmbeddingSparseTensorBatch", Input: []tf.Input{ - reader_handle, + tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Concatenates quantized tensors along one dimension. +// The gradient operator for the SparseAdd op. +// +// The SparseAdd op calculates A + B, where A, B, and the sum are all represented +// as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. +// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty +// values of A and B. // // Arguments: -// concat_dim: 0-D. The dimension along which to concatenate. Must be in the -// range [0, rank(values)). -// values: The `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// input_mins: The minimum scalar values for each of the input tensors. -// input_maxes: The maximum scalar values for each of the input tensors. +// backprop_val_grad: 1-D with shape `[nnz(sum)]`. The gradient with respect to +// the non-empty values of the sum. +// a_indices: 2-D. The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`. +// b_indices: 2-D. The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`. +// sum_indices: 2-D. The `indices` of the sum `SparseTensor`, size +// `[nnz(sum), ndims]`. // -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the +// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the +// non-empty values of B. +func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QuantizedConcat", + Type: "SparseAddGrad", Input: []tf.Input{ - concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes), + backprop_val_grad, a_indices, b_indices, sum_indices, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1) } -// Slice a `SparseTensor` based on the `start` and `size`. -// -// For example, if the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: +// This op consumes a lock created by `MutexLock`. // -// sparse_slice([0, 0], [2, 4]) = shape = [2, 4] -// [ a ] -// [b c ] +// This op exists to consume a tensor created by `MutexLock` (other than +// direct control dependencies). It should be the only that consumes the tensor, +// and will raise an error if it is not. Its only purpose is to keep the +// mutex lock tensor alive until it is consumed by this op. // -// sparse_slice([0, 4], [2, 3]) = shape = [2, 3] -// [ d e ] -// [ ] +// **NOTE**: This operation must run on the same device as its input. This may +// be enforced via the `colocate_with` mechanism. // // Arguments: -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// start: 1-D. tensor represents the start of the slice. -// size: 1-D. tensor represents the size of the slice. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. +// mutex_lock: A tensor returned by `MutexLock`. // -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns the created operation. +func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSlice", + Type: "ConsumeMutexLock", Input: []tf.Input{ - indices, values, shape, start, size, + mutex_lock, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Reduces sparse updates into the variable referenced by `resource` using the `min` operation. +// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd. +type ResourceScatterNdAddAttr func(optionalAttr) + +// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value. // -// This operation computes +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. +// If not specified, defaults to true +func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Applies sparse addition to individual values or slices in a Variable. // -// # Scalar indices -// ref[indices, ...] = min(ref[indices, ...], updates[...]) +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. // -// # Vector indices (for each i) -// ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...]) +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. // -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. // -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions are combined. +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: // -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] +// ``` // -//
-// -//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to +// 8 elements. In Python, that addition would look like this: +// +// ```python +// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// add = tf.scatter_nd_add(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(add) +// ``` +// +// The resulting update to ref would look like this: +// +// [1, 13, 3, 14, 14, 6, 7, 20] +// +// See `tf.scatter_nd` for more details about how to make updates to +// slices. // // Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of +// values to add to ref. // // Returns the created operation. -func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ResourceScatterMin", + Type: "ResourceScatterNdAdd", Input: []tf.Input{ - resource, indices, updates, + ref, indices, updates, }, + Attrs: attrs, } return scope.AddOperation(opspec) } -// Reshapes a quantized tensor as per the Reshape op. +// Replaces the contents of the table with the specified keys and values. // -// ``` +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. // // Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. // -// shape: Defines the shape of the output tensor. -// input_min: The minimum value of the input. -// input_max: The maximum value of the input. -// -// Returns This value is copied from input_min.This value is copied from input_max. -func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns the created operation. +func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QuantizedReshape", + Type: "LookupTableImportV2", Input: []tf.Input{ - tensor, shape, input_min, input_max, + table_handle, keys, values, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// StringSplitAttr is an optional argument to StringSplit. -type StringSplitAttr func(optionalAttr) - -// StringSplitSkipEmpty sets the optional skip_empty attribute to value. +// Extract `patches` from `images` and put them in the "depth" output dimension. // -// value: A `bool`. If `True`, skip the empty strings from the result. -// If not specified, defaults to true -func StringSplitSkipEmpty(value bool) StringSplitAttr { - return func(m optionalAttr) { - m["skip_empty"] = value +// Arguments: +// images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`. +// ksizes: The size of the sliding window for each dimension of `images`. +// strides: 1-D of length 4. How far the centers of two consecutive patches are in +// the images. Must be: `[1, stride_rows, stride_cols, 1]`. +// rates: 1-D of length 4. Must be: `[1, rate_rows, rate_cols, 1]`. This is the +// input stride, specifying how far two consecutive patch samples are in the +// input. Equivalent to extracting patches with +// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by +// subsampling them spatially by a factor of `rates`. This is equivalent to +// `rate` in dilated (a.k.a. Atrous) convolutions. +// padding: The type of padding algorithm to use. +// +// We specify the size-related attributes as: +// +// ```python +// ksizes = [1, ksize_rows, ksize_cols, 1] +// strides = [1, strides_rows, strides_cols, 1] +// rates = [1, rates_rows, rates_cols, 1] +// ``` +// +// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows * +// ksize_cols * depth]` containing image patches with size +// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note +// `out_rows` and `out_cols` are the dimensions of the output patches. +func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding} + opspec := tf.OpSpec{ + Type: "ExtractImagePatches", + Input: []tf.Input{ + images, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Split elements of `input` based on `delimiter` into a `SparseTensor`. -// -// Let N be the size of source (typically N will be the batch size). Split each -// element of `input` based on `delimiter` and return a `SparseTensor` -// containing the splitted tokens. Empty tokens are ignored. -// -// `delimiter` can be empty, or a string of split characters. If `delimiter` is an -// empty string, each element of `input` is split into individual single-byte -// character strings, including splitting of UTF-8 multibyte sequences. Otherwise -// every character of `delimiter` is a potential split point. +// Computes the mean along sparse segments of a tensor. // -// For example: -// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output -// will be +// See `tf.sparse.segment_sum` for usage examples. // -// indices = [0, 0; -// 0, 1; -// 1, 0; -// 1, 1; -// 1, 2] -// shape = [2, 3] -// values = ['hello', 'world', 'a', 'b', 'c'] +// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first +// dimension, selecting a subset of dimension 0, specified by `indices`. // // Arguments: -// input: 1-D. Strings to split. -// delimiter: 0-D. Delimiter characters (bytes), or empty string. // -// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse -// tensor, where the first value is N and the second value is the maximum number -// of tokens in a single input entry. -func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) { +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StringSplit", + Type: "SparseSegmentMean", Input: []tf.Input{ - input, delimiter, + data, indices, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. -type ResourceSparseApplyMomentumAttr func(optionalAttr) +// Deserializes a serialized tree ensemble config and replaces current tree +// +// ensemble. +// +// Arguments: +// tree_ensemble_handle: Handle to the tree ensemble. +// stamp_token: Token to use as the new value of the resource stamp. +// tree_ensemble_serialized: Serialized proto of the ensemble. +// +// Returns the created operation. +func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesDeserializeEnsemble", + Input: []tf.Input{ + tree_ensemble_handle, stamp_token, tree_ensemble_serialized, + }, + } + return scope.AddOperation(opspec) +} -// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. +// Transforms a tf.Example proto (as a string) into typed tensors. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value +// Arguments: +// serialized: A vector containing a batch of binary serialized Example protos. +// dense_defaults: A list of Tensors (some may be empty), whose length matches +// the length of `dense_keys`. dense_defaults[j] provides default values +// when the example's feature_map lacks dense_key[j]. If an empty Tensor is +// provided for dense_defaults[j], then the Feature dense_keys[j] is required. +// The input type is inferred from dense_defaults[j], even when it's empty. +// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, +// then the shape of dense_defaults[j] must match that of dense_shapes[j]. +// If dense_shapes[j] has an undefined major dimension (variable strides dense +// feature), dense_defaults[j] must contain a single element: +// the padding element. +// num_sparse: The number of sparse features to be parsed from the example. This +// must match the lengths of `sparse_keys` and `sparse_types`. +// sparse_keys: A list of `num_sparse` strings. +// The keys expected in the Examples' features associated with sparse values. +// dense_keys: The keys expected in the Examples' features associated with dense +// values. +// sparse_types: A list of `num_sparse` types; the data types of data in each +// Feature given in sparse_keys. +// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// dense_shapes: The shapes of data in each Feature given in dense_keys. +// The length of this list must match the length of `dense_keys`. The +// number of elements in the Feature corresponding to dense_key[j] must +// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == +// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] +// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, +// ..., DN), the shape of the output Tensor dense_values[j] will be (M, +// D1, .., DN), where M is the number of blocks of elements of length +// D1 * .... * DN, in the input. +func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} + opspec := tf.OpSpec{ + Type: "ParseSingleExample", + Input: []tf.Input{ + serialized, tf.OutputList(dense_defaults), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return + } + if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { + scope.UpdateErr("ParseSingleExample", err) + return } + return sparse_indices, sparse_values, sparse_shapes, dense_values } -// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. +// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2. +type WholeFileReaderV2Attr func(optionalAttr) + +// WholeFileReaderV2Container sets the optional container attribute to value. // -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr { return func(m optionalAttr) { - m["use_nesterov"] = value + m["container"] = value } } -// Update relevant entries in '*var' and '*accum' according to the momentum scheme. -// -// Set use_nesterov = True if you want to use Nesterov momentum. -// -// That is for rows we have grad for, we update var and accum as follows: +// WholeFileReaderV2SharedName sets the optional shared_name attribute to value. // -// accum = accum * momentum + grad -// var -= lr * accum +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A Reader that outputs the entire contents of a file as a value. // -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// momentum: Momentum. Must be a scalar. +// To use, enqueue filenames in a Queue. The output of ReaderRead will +// be a filename (key) and the contents of that file (value). // -// Returns the created operation. -func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { +// Returns The handle to reference the Reader. +func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } @@ -14899,137 +14095,166 @@ func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, indices, momentum, - }, + Type: "WholeFileReaderV2", + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the complex conjugate of a complex number. +// Says whether the targets are in the top `K` predictions. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// complex numbers that are the complex conjugate of each element in `input`. The -// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the -// real part and *b* is the imaginary part. +// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the +// prediction for the target class is among the top `k` predictions among +// all predictions for example `i`. Note that the behavior of `InTopK` differs +// from the `TopK` op in its handling of ties; if multiple classes have the +// same prediction value and straddle the top-`k` boundary, all of those +// classes are considered to be in the top `k`. // -// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// More formally, let // -// For example: +// \\(predictions_i\\) be the predictions for all classes for example `i`, +// \\(targets_i\\) be the target class for example `i`, +// \\(out_i\\) be the output for example `i`, // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] -// ``` -func Conj(scope *Scope, input tf.Output) (output tf.Output) { +// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// +// Arguments: +// predictions: A `batch_size` x `classes` tensor. +// targets: A `batch_size` vector of class ids. +// k: Number of top elements to look at for computing precision. +// +// Returns Computed Precision at `k` as a `bool Tensor`. +func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"k": k} opspec := tf.OpSpec{ - Type: "Conj", + Type: "InTopK", Input: []tf.Input{ - input, + predictions, targets, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop. -type CudnnRNNBackpropAttr func(optionalAttr) +// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug. +type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr) -// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr { +// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["rnn_mode"] = value + m["table_id"] = value } } -// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr { +// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["input_mode"] = value + m["table_name"] = value } } -// CudnnRNNBackpropDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr { +// Retrieve Adagrad embedding parameters with debug support. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm.Parameter gradient_accumulators updated by the Adagrad optimization algorithm. +func RetrieveTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Serializes the tree handle to a proto +// +// Arguments: +// tree_handle: Handle to the tree resource to be serialized. +// +// Returns Serialied proto string of the tree resource. +func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorForestTreeSerialize", + Input: []tf.Input{ + tree_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// SparseMatMulAttr is an optional argument to SparseMatMul. +type SparseMatMulAttr func(optionalAttr) + +// SparseMatMulTransposeA sets the optional transpose_a attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeA(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["direction"] = value + m["transpose_a"] = value } } -// CudnnRNNBackpropDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr { +// SparseMatMulTransposeB sets the optional transpose_b attribute to value. +// If not specified, defaults to false +func SparseMatMulTransposeB(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["dropout"] = value + m["transpose_b"] = value } } -// CudnnRNNBackpropSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr { +// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["seed"] = value + m["a_is_sparse"] = value } } -// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr { +// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. +// If not specified, defaults to false +func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { return func(m optionalAttr) { - m["seed2"] = value + m["b_is_sparse"] = value } } -// Backprop step of CudnnRNN. +// Multiply matrix "a" by matrix "b". // -// Compute the backprop of both data and weights in a RNN. +// The inputs must be two-dimensional matrices and the inner dimension of "a" must +// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not +// `SparseTensor`s. This op is optimized for the case where at least one of "a" or +// "b" is sparse, in the sense that they have a large proportion of zero values. +// The breakeven for using this versus a dense matrix multiply on one platform was +// 30% zero values in the sparse matrix. // -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// output_backprop: A 3-D tensor with the same shape as output in the forward pass. -// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward -// pass. -// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward -// pass. -// reserve_space: The same reserve_space produced in for forward operation. -// input_backprop: The backprop to input in the forward pass. Has the same shape -// as input. -// input_h_backprop: The backprop to input_h in the forward pass. Has the same -// shape as input_h. -// input_c_backprop: The backprop to input_c in the forward pass. Has the same -// shape as input_c. -// params_backprop: The backprop to the params buffer in the forward pass. Has the -// same shape as params. -func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { +// The gradient computation of this operation will only take advantage of sparsity +// in the input gradient when that gradient comes from a Relu. +func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -15038,352 +14263,320 @@ func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c a(attrs) } opspec := tf.OpSpec{ - Type: "CudnnRNNBackprop", + Type: "SparseMatMul", Input: []tf.Input{ - input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, + a, b, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) + return op.Output(0) } -// Encode audio data using the WAV file format. -// -// This operation will generate a string suitable to be saved out to create a .wav -// audio file. It will be encoded in the 16-bit PCM format. It takes in float -// values in the range -1.0f to 1.0f, and any outside that value will be clamped to -// that range. -// -// `audio` is a 2-D float Tensor of shape `[length, channels]`. -// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). -// -// Arguments: -// audio: 2-D with shape `[length, channels]`. -// sample_rate: Scalar containing the sample frequency. +// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle. +type ExperimentalThreadPoolHandleAttr func(optionalAttr) + +// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. // -// Returns 0-D. WAV-encoded file contents. -func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { - if scope.Err() != nil { - return +// value: The maximum degree of parallelism to use within operations that execute on this +// threadpool. +// If not specified, defaults to 1 +func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr { + return func(m optionalAttr) { + m["max_intra_op_parallelism"] = value } - opspec := tf.OpSpec{ - Type: "EncodeWav", - Input: []tf.Input{ - audio, sample_rate, - }, +} + +// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr { + return func(m optionalAttr) { + m["container"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes atan of x element-wise. -func Atan(scope *Scope, x tf.Output) (y tf.Output) { +// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// +// Arguments: +// num_threads: The number of threads in the thread pool. +// display_name: A human-readable name for the threads that may be visible in some +// visualizations. +// threadpool. +// +// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset +// ops. +func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Atan", - Input: []tf.Input{ - x, - }, + Type: "ExperimentalThreadPoolHandle", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. -type ResourceApplyAdaMaxAttr func(optionalAttr) +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug. +type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) -// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value. +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If `True`, updating of the var, m, and v tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["table_id"] = value } } -// Update '*var' according to the AdaMax algorithm. +// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load proximal Adagrad embedding parameters with debug support. // -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// v_t <- max(beta2 * v_{t-1}, abs(g)) -// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. +// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm. +// +// // // Returns the created operation. -func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) { +func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdaMax", + Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug", Input: []tf.Input{ - var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad, + parameters, accumulators, gradient_accumulators, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// AssertAttr is an optional argument to Assert. -type AssertAttr func(optionalAttr) +// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters. +type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr) -// AssertSummarize sets the optional summarize attribute to value. +// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: Print this many entries of each tensor. -// If not specified, defaults to 3 -func AssertSummarize(value int64) AssertAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr { return func(m optionalAttr) { - m["summarize"] = value + m["table_id"] = value } } -// Asserts that the given condition is true. +// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load proximal Adagrad embedding parameters. // -// If `condition` evaluates to false, print the list of tensors in `data`. -// `summarize` determines how many entries of the tensors to print. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// condition: The condition to evaluate. -// data: The tensors to print out when condition is false. +// parameters: Value of parameters used in the proximal Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm. +// +// // // Returns the created operation. -func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { +func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Assert", + Type: "LoadTPUEmbeddingProximalAdagradParameters", Input: []tf.Input{ - condition, tf.OutputList(data), + parameters, accumulators, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Split a `SparseTensor` into `num_split` tensors along one dimension. -// -// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices -// `[0 : shape[split_dim] % num_split]` gets one extra dimension. -// For example, if `split_dim = 1` and `num_split = 2` and the input is -// -// input_tensor = shape = [2, 7] -// [ a d e ] -// [b c ] -// -// Graphically the output tensors are: -// -// output_tensor[0] = shape = [2, 4] -// [ a ] -// [b c ] -// -// output_tensor[1] = shape = [2, 3] -// [ d e ] -// [ ] +// Get the current size of the TensorArray. // // Arguments: -// split_dim: 0-D. The dimension along which to split. Must be in the range -// `[0, rank(shape))`. -// indices: 2-D tensor represents the indices of the sparse tensor. -// values: 1-D tensor represents the values of the sparse tensor. -// shape: 1-D. tensor represents the shape of the sparse tensor. -// output indices: A list of 1-D tensors represents the indices of the output -// sparse tensors. -// num_split: The number of ways to split. +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// flow_in: A float scalar that enforces proper chaining of operations. // -// Returns A list of 1-D tensors represents the values of the output sparse -// tensors.A list of 1-D tensors represents the shape of the output sparse -// tensors. -func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) { +// Returns The current size of the TensorArray. +func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_split": num_split} opspec := tf.OpSpec{ - Type: "SparseSplit", + Type: "TensorArraySizeV3", Input: []tf.Input{ - split_dim, indices, values, shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil { - scope.UpdateErr("SparseSplit", err) - return - } - return output_indices, output_values, output_shape -} - -// Computes numerical negative value element-wise. -// -// I.e., \\(y = -x\\). -func Neg(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Neg", - Input: []tf.Input{ - x, + handle, flow_in, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns x + y element-wise. +// Computes gradients for the scaled exponential linear (Selu) operation. // -// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// gradients: The backpropagated gradients to the corresponding Selu operation. +// outputs: The outputs of the corresponding Selu operation. +// +// Returns The gradients: `gradients * (outputs + scale * alpha)` +// if outputs < 0, `scale * gradients` otherwise. +func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Add", + Type: "SeluGrad", Input: []tf.Input{ - x, y, + gradients, outputs, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the derivative of a Gamma random sample w.r.t. `alpha`. -func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RandomGammaGrad", - Input: []tf.Input{ - alpha, sample, - }, +// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2. +type ResourceSparseApplyFtrlV2Attr func(optionalAttr) + +// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Execute a sub graph on a remote processor. +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. // -// The graph specifications(such as graph itself, input tensors and output names) -// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo -// as serialized_remote_fused_graph_execute_info. -// The specifications will be passed to a dedicated registered -// remote fused graph executor. The executor will send the graph specifications -// to a remote processor and execute that graph. The execution results -// will be passed to consumer nodes as outputs of this node. +// That is for rows we have grad for, we update var, accum and linear as follows: +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // // Arguments: -// inputs: Arbitrary number of tensors with arbitrary data types +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. // -// serialized_remote_fused_graph_execute_info: Serialized protocol buffer -// of RemoteFusedGraphExecuteInfo which contains graph specifications. +// lr_power: Scaling factor. Must be a scalar. // -// Returns Arbitrary number of tensors with arbitrary data types -func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) { +// Returns the created operation. +func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RemoteFusedGraphExecute", + Type: "ResourceSparseApplyFtrlV2", Input: []tf.Input{ - tf.OutputList(inputs), + var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("RemoteFusedGraphExecute", err) - return - } - return outputs + return scope.AddOperation(opspec) } -// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad. -type MaxPool3DGradGradAttr func(optionalAttr) +// SumAttr is an optional argument to Sum. +type SumAttr func(optionalAttr) -// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value. +// SumKeepDims sets the optional keep_dims attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SumKeepDims(value bool) SumAttr { return func(m optionalAttr) { - m["data_format"] = value + m["keep_dims"] = value } } -// Computes second-order gradients of the maxpooling function. +// Computes the sum of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) { +// Returns The reduced tensor. +func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3DGradGrad", + Type: "Sum", Input: []tf.Input{ - orig_input, orig_output, grad, + input, axis, }, Attrs: attrs, } @@ -15391,101 +14584,165 @@ func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output return op.Output(0) } -// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2. -type Conv3DBackpropFilterV2Attr func(optionalAttr) +// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. +type SparseToSparseSetOperationAttr func(optionalAttr) -// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr { +// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { return func(m optionalAttr) { - m["data_format"] = value + m["validate_indices"] = value } } -// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value. +// Applies set operation along last dimension of 2 `SparseTensor` inputs. // -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the filter. +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// +// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the +// order and range of `set1` and `set2` indices. +// +// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, +// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same +// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, +// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same +// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but +// ignored. +// +// If `validate_indices` is `True`, this op validates the order and range of `set1` +// and `set2` indices. +// +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 5-D -// `[filter_depth, filter_height, filter_width, in_channels, out_channels]` -// tensor. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) { +// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must +// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the +// max set size across `0...n-1` dimensions. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// order. +// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major +// order. +// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must +// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the +// max set size across `0...n-1` dimensions. +// +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3DBackpropFilterV2", + Type: "SparseToSparseSetOperation", Input: []tf.Input{ - input, filter_sizes, out_backprop, + set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) +// Computes softmax cross entropy cost and gradients to backpropagate. +// +// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept +// a matrix of label probabilities, but rather a single label per row +// of features. This label is considered to have probability 1.0 for the +// given row. +// +// Inputs are the logits, not probabilities. +// +// Arguments: +// features: batch_size x num_classes matrix +// labels: batch_size vector with values in [0, num_classes). +// This is the label for the given minibatch entry. +// +// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). +func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSoftmaxCrossEntropyWithLogits", + Input: []tf.Input{ + features, labels, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { +// StridedSliceGradAttr is an optional argument to StridedSliceGrad. +type StridedSliceGradAttr func(optionalAttr) + +// StridedSliceGradBeginMask sets the optional begin_mask attribute to value. +// If not specified, defaults to 0 +func StridedSliceGradBeginMask(value int64) StridedSliceGradAttr { return func(m optionalAttr) { - m["num_bits"] = value + m["begin_mask"] = value } } -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { +// StridedSliceGradEndMask sets the optional end_mask attribute to value. +// If not specified, defaults to 0 +func StridedSliceGradEndMask(value int64) StridedSliceGradAttr { return func(m optionalAttr) { - m["narrow_range"] = value + m["end_mask"] = value } } -// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` -// -// and `max` to 'outputs' tensor of same shape as `inputs`. +// StridedSliceGradEllipsisMask sets the optional ellipsis_mask attribute to value. +// If not specified, defaults to 0 +func StridedSliceGradEllipsisMask(value int64) StridedSliceGradAttr { + return func(m optionalAttr) { + m["ellipsis_mask"] = value + } +} + +// StridedSliceGradNewAxisMask sets the optional new_axis_mask attribute to value. +// If not specified, defaults to 0 +func StridedSliceGradNewAxisMask(value int64) StridedSliceGradAttr { + return func(m optionalAttr) { + m["new_axis_mask"] = value + } +} + +// StridedSliceGradShrinkAxisMask sets the optional shrink_axis_mask attribute to value. +// If not specified, defaults to 0 +func StridedSliceGradShrinkAxisMask(value int64) StridedSliceGradAttr { + return func(m optionalAttr) { + m["shrink_axis_mask"] = value + } +} + +// Returns the gradient of `StridedSlice`. // -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. +// Since `StridedSlice` cuts out pieces of its `input` which is size +// `shape`, its gradient will have the same shape (which is passed here +// as `shape`). The gradient will be zero in any element that the slice +// does not select. // -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { +// Arguments are the same as StridedSliceGrad with the exception that +// `dy` is the input gradient to be propagated and `shape` is the +// shape of `StridedSlice`'s `input`. +func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Output, strides tf.Output, dy tf.Output, optional ...StridedSliceGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15494,9 +14751,9 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max a(attrs) } opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", + Type: "StridedSliceGrad", Input: []tf.Input{ - inputs, min, max, + shape, begin, end, strides, dy, }, Attrs: attrs, } @@ -15504,198 +14761,136 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max return op.Output(0) } -// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. -type ResourceScatterNdUpdateAttr func(optionalAttr) +// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters. +type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr) -// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. +// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["table_id"] = value } } -// Applies sparse `updates` to individual values or slices within a given -// -// variable according to `indices`. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -// ``` -// -// For example, say we want to update 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that update would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1] ,[7]]) -// updates = tf.constant([9, 10, 11, 12]) -// update = tf.scatter_nd_update(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(update) -// ``` +// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load RMSProp embedding parameters. // -// The resulting update to ref would look like this: +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// [1, 11, 3, 10, 9, 6, 7, 12] +// Arguments: +// parameters: Value of parameters used in the RMSProp optimization algorithm. +// ms: Value of ms used in the RMSProp optimization algorithm. +// mom: Value of mom used in the RMSProp optimization algorithm. // -// See `tf.scatter_nd` for more details about how to make updates to -// slices. // -// Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of updated -// values to add to ref. // // Returns the created operation. -func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { +func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceScatterNdUpdate", + Type: "LoadTPUEmbeddingRMSPropParameters", Input: []tf.Input{ - ref, indices, updates, + parameters, ms, mom, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Produces a string handle for the given MultiDeviceIterator. -// -// Arguments: -// multi_device_iterator: A MultiDeviceIterator resource. +// Computes the gradient for the inverse of `x` wrt its input. // -// Returns A string representing the resource. -func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) { +// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` +// is the corresponding input gradient. +func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorToStringHandle", + Type: "ReciprocalGrad", Input: []tf.Input{ - multi_device_iterator, + y, dy, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// CudnnRNNV3Attr is an optional argument to CudnnRNNV3. -type CudnnRNNV3Attr func(optionalAttr) - -// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } -} - -// CudnnRNNV3InputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["input_mode"] = value - } -} - -// CudnnRNNV3Direction sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNV3Direction(value string) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["direction"] = value +// Returns the element-wise min of two SparseTensors. +// +// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. +// +// Arguments: +// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, in the canonical lexicographic ordering. +// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. +// a_shape: 1-D. Shape of the input SparseTensor. +// b_indices: counterpart to `a_indices` for the other operand. +// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. +// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. +// +// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. +func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { + if scope.Err() != nil { + return } -} - -// CudnnRNNV3Dropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["dropout"] = value + opspec := tf.OpSpec{ + Type: "SparseSparseMinimum", + Input: []tf.Input{ + a_indices, a_values, a_shape, b_indices, b_values, b_shape, + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// CudnnRNNV3Seed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. +type ResourceSparseApplyAdagradDAAttr func(optionalAttr) -// CudnnRNNV3Seed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr { +// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_locking"] = value } } -// CudnnRNNV3IsTraining sets the optional is_training attribute to value. -// If not specified, defaults to true -func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// A RNN backed by cuDNN. +// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. // -// Computes the RNN from the input and initial states, with respect to the params -// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN. +// Arguments: +// var_: Should be from a Variable(). +// gradient_accumulator: Should be from a Variable(). +// gradient_squared_accumulator: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Learning rate. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// global_step: Training step number. Must be a scalar. // -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicates whether there is a linear projection between the input and -// the actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. Should be -// "unidirectional" or "bidirectional". -// dropout: Dropout probability. When set to 0., dropout is disabled. -// seed: The 1st part of a seed to initialize dropout. -// seed2: The 2nd part of a seed to initialize dropout. -// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. -// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, -// num_units]. -// input_c: For LSTM, a 3-D tensor with the shape of -// [num_layer * dir, batch, num_units]. For other models, it is ignored. -// params: A 1-D tensor that contains the weights and biases in an opaque layout. -// The size must be created through CudnnRNNParamsSize, and initialized -// separately. Note that they might not be compatible across different -// generations. So it is a good idea to save and restore -// sequence_lengths: a vector of lengths of each input sequence. -// output: A 3-D tensor with the shape of [seq_length, batch_size, -// dir * num_units]. -// output_h: The same shape has input_h. -// output_c: The same shape as input_c for LSTM. An empty tensor for other models. -// is_training: Indicates whether this operation is used for inferenece or -// training. -// reserve_space: An opaque tensor that can be used in backprop calculation. It -// is only produced if is_training is true. -func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) { +// Returns the created operation. +func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -15704,160 +14899,193 @@ func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "CudnnRNNV3", + Type: "ResourceSparseApplyAdagradDA", Input: []tf.Input{ - input, input_h, input_c, params, sequence_lengths, + var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return scope.AddOperation(opspec) } -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: +// EncodeJpegAttr is an optional argument to EncodeJpeg. +type EncodeJpegAttr func(optionalAttr) + +// EncodeJpegFormat sets the optional format attribute to value. // -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. +// value: Per pixel image format. +// If not specified, defaults to "" +func EncodeJpegFormat(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["format"] = value + } +} + +// EncodeJpegQuality sets the optional quality attribute to value. // -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. +// value: Quality of the compression from 0 to 100 (higher is better and slower). +// If not specified, defaults to 95 +func EncodeJpegQuality(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["quality"] = value + } +} + +// EncodeJpegProgressive sets the optional progressive attribute to value. // -// Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. +// value: If True, create a JPEG that loads progressively (coarse to fine). +// If not specified, defaults to false +func EncodeJpegProgressive(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["progressive"] = value + } +} + +// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. // -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// value: If True, spend CPU/RAM to reduce size with no quality change. +// If not specified, defaults to false +func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["optimize_size"] = value } - opspec := tf.OpSpec{ - Type: "SparseSoftmax", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, - }, +} + +// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. +// +// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. +// If not specified, defaults to true +func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { + return func(m optionalAttr) { + m["chroma_downsampling"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. +// EncodeJpegDensityUnit sets the optional density_unit attribute to value. // -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, +// value: Unit used to specify `x_density` and `y_density`: +// pixels per inch (`'in'`) or centimeter (`'cm'`). +// If not specified, defaults to "in" +func EncodeJpegDensityUnit(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["density_unit"] = value + } +} + +// EncodeJpegXDensity sets the optional x_density attribute to value. // -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// value: Horizontal pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegXDensity(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["x_density"] = value + } +} + +// EncodeJpegYDensity sets the optional y_density attribute to value. // -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` +// value: Vertical pixels per density unit. +// If not specified, defaults to 300 +func EncodeJpegYDensity(value int64) EncodeJpegAttr { + return func(m optionalAttr) { + m["y_density"] = value + } +} + +// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. // -// `data.shape` must start with `partitions.shape`. +// value: If not empty, embed this XMP metadata in the image header. +// If not specified, defaults to "" +func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { + return func(m optionalAttr) { + m["xmp_metadata"] = value + } +} + +// JPEG-encode an image. // -// For example: +// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. // -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] +// The attr `format` can be used to override the color format of the encoded +// output. Values can be: // -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] -// ``` +// * `''`: Use a default format based on the number of channels in the image. +// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension +// of `image` must be 1. +// * `rgb`: Output an RGB JPEG image. The `channels` dimension +// of `image` must be 3. // -// See `dynamic_stitch` for an example on how to merge partitions back. +// If `format` is not specified or is the empty string, a default format is picked +// in function of the number of channels in `image`: // -//
-// -//
+// * 1: Output a grayscale image. +// * 3: Output an RGB image. // // Arguments: +// image: 3-D with shape `[height, width, channels]`. // -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { +// Returns 0-D. JPEG-encoded image. +func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_partitions": num_partitions} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DynamicPartition", + Type: "EncodeJpeg", Input: []tf.Input{ - data, partitions, + image, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs + return op.Output(0) } -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) +// MultinomialAttr is an optional argument to Multinomial. +type MultinomialAttr func(optionalAttr) -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. +// MultinomialSeed sets the optional seed attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { +// value: If either seed or seed2 is set to be non-zero, the internal random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func MultinomialSeed(value int64) MultinomialAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed"] = value } } -// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { +// MultinomialSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func MultinomialSeed2(value int64) MultinomialAttr { return func(m optionalAttr) { - m["update_slots"] = value + m["seed2"] = value } } -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) +// MultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value + } +} + +// Draws samples from a multinomial distribution. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. // -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15866,68 +15094,82 @@ func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.O a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", + Type: "Multinomial", Input: []tf.Input{ - var_, accum, lr, grad, + logits, num_samples, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. +// RetrieveTPUEmbeddingRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParameters. +type RetrieveTPUEmbeddingRMSPropParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingRMSPropParametersTableName(value string) RetrieveTPUEmbeddingRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve RMSProp embedding parameters. // -// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest -// layer. -func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) { +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm. +func RetrieveTPUEmbeddingRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BoostedTreesGetEnsembleStates", - Input: []tf.Input{ - tree_ensemble_handle, - }, + Type: "RetrieveTPUEmbeddingRMSPropParameters", + + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0), op.Output(1), op.Output(2) } -// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. -type ResourceApplyPowerSignAttr func(optionalAttr) +// QuantizedRelu6Attr is an optional argument to QuantizedRelu6. +type QuantizedRelu6Attr func(optionalAttr) -// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { +// QuantizedRelu6OutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["out_type"] = value } } -// Update '*var' according to the AddSign update. -// -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g -// variable <- variable - lr_t * update +// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` // // Arguments: -// var_: Should be from a Variable(). -// m: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// logbase: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. -// grad: The gradient. // -// Returns the created operation. -func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { if scope.Err() != nil { return } @@ -15936,57 +15178,66 @@ func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Out a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyPowerSign", + Type: "QuantizedRelu6", Input: []tf.Input{ - var_, m, lr, logbase, sign_decay, beta, grad, + features, min_features, max_features, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// StringFormatAttr is an optional argument to StringFormat. -type StringFormatAttr func(optionalAttr) +// BatchMatMulAttr is an optional argument to BatchMatMul. +type BatchMatMulAttr func(optionalAttr) -// StringFormatTemplate sets the optional template attribute to value. +// BatchMatMulAdjX sets the optional adj_x attribute to value. // -// value: A string, the template to format tensor summaries into. -// If not specified, defaults to "%s" -func StringFormatTemplate(value string) StringFormatAttr { +// value: If `True`, adjoint the slices of `x`. Defaults to `False`. +// If not specified, defaults to false +func BatchMatMulAdjX(value bool) BatchMatMulAttr { return func(m optionalAttr) { - m["template"] = value + m["adj_x"] = value } } -// StringFormatPlaceholder sets the optional placeholder attribute to value. +// BatchMatMulAdjY sets the optional adj_y attribute to value. // -// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted. -// If not specified, defaults to "%s" -func StringFormatPlaceholder(value string) StringFormatAttr { +// value: If `True`, adjoint the slices of `y`. Defaults to `False`. +// If not specified, defaults to false +func BatchMatMulAdjY(value bool) BatchMatMulAttr { return func(m optionalAttr) { - m["placeholder"] = value + m["adj_y"] = value } } -// StringFormatSummarize sets the optional summarize attribute to value. +// Multiplies slices of two tensors in batches. // -// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. -// If not specified, defaults to 3 -func StringFormatSummarize(value int64) StringFormatAttr { - return func(m optionalAttr) { - m["summarize"] = value - } -} - -// Formats a string template using a list of tensors. +// Multiplies all slices of `Tensor` `x` and `y` (each slice can be +// viewed as an element of a batch), and arranges the individual results +// in a single output tensor of the same batch size. Each of the +// individual slices can optionally be adjointed (to adjoint a matrix +// means to transpose and conjugate it) before multiplication by setting +// the `adj_x` or `adj_y` flag to `True`, which are by default `False`. // -// Formats a string template using a list of tensors, pretty-printing tensor summaries. +// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]` +// and `[..., r_y, c_y]`. +// +// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where: +// +// r_o = c_x if adj_x else r_x +// c_o = r_y if adj_y else c_y +// +// It is computed as: +// +// output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :]) // // Arguments: -// inputs: The list of tensors to format into the placeholder string. +// x: 2-D or higher with shape `[..., r_x, c_x]`. +// y: 2-D or higher with shape `[..., r_y, c_y]`. // -// Returns = The resulting string scalar. -func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) { +// Returns 3-D or higher with shape `[..., r_o, c_o]` +func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -15995,9 +15246,9 @@ func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr a(attrs) } opspec := tf.OpSpec{ - Type: "StringFormat", + Type: "BatchMatMul", Input: []tf.Input{ - tf.OutputList(inputs), + x, y, }, Attrs: attrs, } @@ -16005,479 +15256,475 @@ func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr return op.Output(0) } -// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. -type ResourceSparseApplyRMSPropAttr func(optionalAttr) +// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample. +type ParseSequenceExampleAttr func(optionalAttr) -// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value. +// If not specified, defaults to 0 // -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { +// REQUIRES: value >= 0 +func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["Ncontext_sparse"] = value } } -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. +// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value. +// If not specified, defaults to 0 // -// Returns the created operation. -func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceSparseApplyRMSProp", - Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, - }, - Attrs: attrs, +// REQUIRES: value >= 0 +func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Ncontext_dense"] = value } - return scope.AddOperation(opspec) } -// Creates a TensorList by indexing into a Tensor. +// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value. +// If not specified, defaults to 0 // -// Each member of the TensorList corresponds to one row of the input tensor, -// specified by the given index (see `tf.gather`). +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr { + return func(m optionalAttr) { + m["Nfeature_list_sparse"] = value + } +} + +// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value. +// If not specified, defaults to 0 // -// tensor: The input tensor. -// indices: The indices used to index into the list. -// element_shape: The shape of the elements in the list (can be less specified than -// the shape of the tensor). -// num_elements: The size of the output list. Must be large enough to accommodate -// the largest index in indices. If -1, the list is just large enough to include -// the largest index in indices. -// output_handle: The TensorList. -func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorListScatterV2", - Input: []tf.Input{ - tensor, indices, element_shape, num_elements, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. -type SampleDistortedBoundingBoxAttr func(optionalAttr) - -// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { +// REQUIRES: value >= 0 +func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["seed"] = value + m["Nfeature_list_dense"] = value } } -// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. +// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. +// value: A list of Ncontext_sparse types; the data types of data in +// each context Feature given in context_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> // -// value: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. -// If not specified, defaults to 0.1 -func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["min_object_covered"] = value + m["context_sparse_types"] = value } } -// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value. +// If not specified, defaults to <> // -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["aspect_ratio_range"] = value + m["feature_list_dense_types"] = value } } -// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value. // -// value: The cropped area of the image must contain a fraction of the -// supplied image within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { +// value: A list of Ncontext_dense shapes; the shapes of data in +// each context Feature given in context_dense_keys. +// The number of elements in the Feature corresponding to context_dense_key[j] +// must always equal context_dense_shapes[j].NumEntries(). +// The shape of context_dense_values[j] will match context_dense_shapes[j]. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["area_range"] = value + m["context_dense_shapes"] = value } } -// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. +// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value. // -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { +// value: A list of Nfeature_list_sparse types; the data types +// of data in each FeatureList given in feature_list_sparse_keys. +// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList), +// DT_INT64 (Int64List), and DT_STRING (BytesList). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["max_attempts"] = value + m["feature_list_sparse_types"] = value } } -// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value. // -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. -// If not specified, defaults to false -func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { +// value: A list of Nfeature_list_dense shapes; the shapes of +// data in each FeatureList given in feature_list_dense_keys. +// The shape of each Feature in the FeatureList corresponding to +// feature_list_dense_key[j] must always equal +// feature_list_dense_shapes[j].NumEntries(). +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr { return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value + m["feature_list_dense_shapes"] = value } } -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) -// -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` -// -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. +// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors. // // Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. -// -// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { +// serialized: A vector containing binary serialized SequenceExample protos. +// debug_name: A vector containing the names of the serialized protos. +// May contain, for example, table key (descriptive) name for the +// corresponding serialized proto. This is purely useful for debugging +// purposes, and the presence of values here has no effect on the output. +// May also be an empty vector if no name is available. +// context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty). +// context_dense_defaults[j] provides default values +// when the SequenceExample's context map lacks context_dense_key[j]. +// If an empty Tensor is provided for context_dense_defaults[j], +// then the Feature context_dense_keys[j] is required. +// The input type is inferred from context_dense_defaults[j], even when it's +// empty. If context_dense_defaults[j] is not empty, its shape must match +// context_dense_shapes[j]. +// feature_list_dense_missing_assumed_empty: A vector listing the +// FeatureList keys which may be missing from the SequenceExamples. If the +// associated FeatureList is missing, it is treated as empty. By default, +// any FeatureList not listed in this vector must exist in the SequenceExamples. +// context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars). +// The keys expected in the Examples' features associated with context_sparse +// values. +// context_dense_keys: A list of Ncontext_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' context features associated with +// dense values. +// feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors +// (scalars). The keys expected in the FeatureLists associated with sparse +// values. +// feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars). +// The keys expected in the SequenceExamples' feature_lists associated +// with lists of dense values. +func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBox", + Type: "ParseSequenceExample", Input: []tf.Input{ - image_size, bounding_boxes, + serialized, debug_name, tf.OutputList(context_dense_defaults), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// LRNAttr is an optional argument to LRN. -type LRNAttr func(optionalAttr) - -// LRNDepthRadius sets the optional depth_radius attribute to value. -// -// value: 0-D. Half-width of the 1-D normalization window. -// If not specified, defaults to 5 -func LRNDepthRadius(value int64) LRNAttr { - return func(m optionalAttr) { - m["depth_radius"] = value + if scope.Err() != nil { + return } -} - -// LRNBias sets the optional bias attribute to value. -// -// value: An offset (usually positive to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNBias(value float32) LRNAttr { - return func(m optionalAttr) { - m["bias"] = value + var idx int + var err error + if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return } + if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil { + scope.UpdateErr("ParseSequenceExample", err) + return + } + return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths } -// LRNAlpha sets the optional alpha attribute to value. +// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters. +type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNAlpha(value float32) LRNAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr { return func(m optionalAttr) { - m["alpha"] = value + m["table_id"] = value } } -// LRNBeta sets the optional beta attribute to value. -// -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNBeta(value float32) LRNAttr { +// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr { return func(m optionalAttr) { - m["beta"] = value + m["table_name"] = value } } -// Local Response Normalization. +// Load ADAM embedding parameters. // -// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last -// dimension), and each vector is normalized independently. Within a given vector, -// each component is divided by the weighted, squared sum of inputs within -// `depth_radius`. In detail, +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// sqr_sum[a, b, c, d] = -// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) -// output = input / (bias + alpha * sqr_sum) ** beta +// Arguments: +// parameters: Value of parameters used in the ADAM optimization algorithm. +// momenta: Value of momenta used in the ADAM optimization algorithm. +// velocities: Value of velocities used in the ADAM optimization algorithm. // -// For details, see [Krizhevsky et al., ImageNet classification with deep -// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). // -// Arguments: -// input: 4-D. -func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { +// +// Returns the created operation. +func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LRN", + Type: "LoadTPUEmbeddingADAMParameters", Input: []tf.Input{ - input, + parameters, momenta, velocities, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates a dataset that zips together `input_datasets`. -func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Inverse 2D real-valued fast Fourier transform. +// +// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most 2 dimensions of `input`. +// +// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`: +// The inner-most dimension contains the `fft_length / 2 + 1` unique components of +// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed +// from the size of the inner-most 2 dimensions of `input`. If the FFT length used +// to compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along each axis `IRFFT2D` is computed on, if `fft_length` (or +// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. +// +// Returns A float32 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with the `fft_length` samples of their +// inverse 2D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.irfft2 +// @end_compatibility +func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ZipDataset", + Type: "IRFFT2D", Input: []tf.Input{ - tf.OutputList(input_datasets), + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. -type ResourceSparseApplyAdagradAttr func(optionalAttr) +// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple. +type InfeedEnqueueTupleAttr func(optionalAttr) -// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. +// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { +// value: A vector holding the requested layout in minor-to-major sequence for +// all the tuple shapes, in the order the shapes appear in the "shapes" input. +// The layout elements for a sub-shape can be set to -1, in which case the +// corresponding layout will be computed by the infeed operation. +// If not specified, defaults to <> +func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["layouts"] = value } } -// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. -// If not specified, defaults to true -func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { +// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. +// +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr { return func(m optionalAttr) { - m["update_slots"] = value + m["device_ordinal"] = value } } -// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. -// -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) +// Feeds multiple Tensor values into the computation as an XLA tuple. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// inputs: A list of tensors that will be provided using the infeed mechanism. +// shapes: The shapes of each tensor in `inputs`. // // Returns the created operation. -func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { +func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"shapes": shapes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagrad", + Type: "InfeedEnqueueTuple", Input: []tf.Input{ - var_, accum, lr, grad, indices, + tf.OutputList(inputs), }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Elementwise computes the bitwise right-shift of `x` and `y`. -// -// Performs a logical shift for unsigned integer types, and an arithmetic shift -// for signed integer types. +// Returns which elements of x are finite. // -// If `y` is negative, or greater than or equal to than the width of `x` in bits -// the result is implementation defined. -func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// @compatibility(numpy) +// Equivalent to np.isfinite +// @end_compatibility +func IsFinite(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RightShift", + Type: "IsFinite", Input: []tf.Input{ - x, y, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorListStackAttr is an optional argument to TensorListStack. -type TensorListStackAttr func(optionalAttr) +// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. +type ResourceStridedSliceAssignAttr func(optionalAttr) -// TensorListStackNumElements sets the optional num_elements attribute to value. -// If not specified, defaults to -1 -func TensorListStackNumElements(value int64) TensorListStackAttr { +// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr { return func(m optionalAttr) { - m["num_elements"] = value + m["begin_mask"] = value } } -// Stacks all tensors in the list. +// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["end_mask"] = value + } +} + +// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["ellipsis_mask"] = value + } +} + +// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["new_axis_mask"] = value + } +} + +// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value. +// If not specified, defaults to 0 +func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr { + return func(m optionalAttr) { + m["shrink_axis_mask"] = value + } +} + +// Assign `value` to the sliced l-value reference of `ref`. // -// Requires that all tensors have the same shape. +// The values of `value` are assigned to the positions in the variable +// `ref` that are selected by the slice parameters. The slice parameters +// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`. // -// input_handle: the input list -// tensor: the gathered result -// num_elements: optional. If not -1, the number of elements in the list. +// NOTE this op currently does not support broadcasting and so `value`'s +// shape must be exactly the shape produced by the slice of `ref`. // -func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) { +// Returns the created operation. +func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorListStack", + Type: "ResourceStridedSliceAssign", Input: []tf.Input{ - input_handle, element_shape, + ref, begin, end, strides, value, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. -type StatelessRandomUniformAttr func(optionalAttr) +// ArgMaxAttr is an optional argument to ArgMax. +type ArgMaxAttr func(optionalAttr) -// StatelessRandomUniformDtype sets the optional dtype attribute to value. -// -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { +// ArgMaxOutputType sets the optional output_type attribute to value. +// If not specified, defaults to DT_INT64 +func ArgMaxOutputType(value tf.DataType) ArgMaxAttr { return func(m optionalAttr) { - m["dtype"] = value + m["output_type"] = value } } -// Outputs deterministic pseudorandom random values from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[0, 1)`. The -// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// Returns the index with the largest value across dimensions of a tensor. // -// The outputs are a deterministic function of `shape` and `seed`. +// Note that in case of ties the identity of the return value is not guaranteed. // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). // -// Returns Random values with specified shape. -func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { +// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. +// Describes which dimension of the input Tensor to reduce across. For vectors, +// use dimension = 0. +func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -16486,9 +15733,9 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessRandomUniform", + Type: "ArgMax", Input: []tf.Input{ - shape, seed, + input, dimension, }, Attrs: attrs, } @@ -16496,196 +15743,92 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio return op.Output(0) } -// Makes its input available to the next iteration. +// Fetches multiple values from infeed as an XLA tuple. // // Arguments: -// data: The tensor to be made available to the next iteration. +// dtypes: The element types of each element in `outputs`. +// shapes: The shapes of each tensor in `outputs`. // -// Returns The same tensor as `data`. -func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { +// Returns A list of tensors that will be provided using the infeed mechanism. +func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes} opspec := tf.OpSpec{ - Type: "NextIteration", - Input: []tf.Input{ - data, - }, + Type: "InfeedDequeueTuple", + + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Output a fact about factorials. -func Fact(scope *Scope) (fact tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "Fact", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Deserialize `SparseTensor` objects. -// -// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where -// the last dimension stores serialized `SparseTensor` objects and the other N -// dimensions (N >= 0) correspond to a batch. The ranks of the original -// `SparseTensor` objects must all match. When the final `SparseTensor` is -// created, its rank is the rank of the incoming `SparseTensor` objects plus N; -// the sparse tensors have been concatenated along new dimensions, one for each -// batch. -// -// The output `SparseTensor` object's shape values for the original dimensions -// are the max across the input `SparseTensor` objects' shape values for the -// corresponding dimensions. The new dimensions match the size of the batch. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: -// -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] -// -// and -// -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] -// -// then the final deserialized `SparseTensor` will be: -// -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] -// -// Arguments: -// serialized_sparse: The serialized `SparseTensor` objects. The last dimension -// must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { - if scope.Err() != nil { + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("InfeedDequeueTuple", err) return } - attrs := map[string]interface{}{"dtype": dtype} - opspec := tf.OpSpec{ - Type: "DeserializeSparse", - Input: []tf.Input{ - serialized_sparse, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// SqueezeAttr is an optional argument to Squeeze. -type SqueezeAttr func(optionalAttr) - -// SqueezeAxis sets the optional axis attribute to value. -// -// value: If specified, only squeezes the dimensions listed. The dimension -// index starts at 0. It is an error to squeeze a dimension that is not 1. Must -// be in the range `[-rank(input), rank(input))`. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func SqueezeAxis(value []int64) SqueezeAttr { - return func(m optionalAttr) { - m["squeeze_dims"] = value - } + return outputs } -// Removes dimensions of size 1 from the shape of a tensor. -// -// Given a tensor `input`, this operation returns a tensor of the same type with -// all dimensions of size 1 removed. If you don't want to remove all size 1 -// dimensions, you can remove specific size 1 dimensions by specifying -// `axis`. -// -// For example: -// -// ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t)) ==> [2, 3] -// ``` -// -// Or, to remove specific size 1 dimensions: -// -// ``` -// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] -// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1] -// ``` +// Enqueue multiple Tensor values on the computation outfeed. // // Arguments: -// input: The `input` to squeeze. +// inputs: A list of tensors that will be inserted into the outfeed queue as an +// XLA tuple. // -// Returns Contains the same data as `input`, but has one or more dimensions of -// size 1 removed. -func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) { +// Returns the created operation. +func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Squeeze", + Type: "OutfeedEnqueueTuple", Input: []tf.Input{ - input, + tf.OutputList(inputs), }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta. -type ResourceApplyAdadeltaAttr func(optionalAttr) +// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. +type ResourceApplyAdagradAttr func(optionalAttr) -// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value. +// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. // -// value: If True, updating of the var, accum and update_accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr { +func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the adadelta scheme. +// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr { + return func(m optionalAttr) { + m["update_slots"] = value + } +} + +// Update '*var' according to the adagrad scheme. // -// accum = rho() * accum + (1 - rho()) * grad.square(); -// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad; -// update_accum = rho() * update_accum + (1 - rho()) * update.square(); -// var -= update; +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) // // Arguments: // var_: Should be from a Variable(). // accum: Should be from a Variable(). -// accum_update: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// rho: Decay factor. Must be a scalar. -// epsilon: Constant factor. Must be a scalar. // grad: The gradient. // // Returns the created operation. -func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) { +func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -16694,58 +15837,108 @@ func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_ a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdadelta", + Type: "ResourceApplyAdagrad", Input: []tf.Input{ - var_, accum, accum_update, lr, rho, epsilon, grad, + var_, accum, lr, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. -type NonMaxSuppressionAttr func(optionalAttr) +// CudnnRNNV3Attr is an optional argument to CudnnRNNV3. +type CudnnRNNV3Attr func(optionalAttr) -// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value. -// -// value: A float representing the threshold for deciding whether boxes -// overlap too much with respect to IOU. -// If not specified, defaults to 0.5 -func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr { +// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr { return func(m optionalAttr) { - m["iou_threshold"] = value + m["rnn_mode"] = value } } -// Greedily selects a subset of bounding boxes in descending order of score, -// -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Note that this -// algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression( -// boxes, scores, max_output_size, iou_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) +// CudnnRNNV3InputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNV3Direction sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNV3Direction(value string) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNV3Dropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNV3Seed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNV3Seed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// CudnnRNNV3IsTraining sets the optional is_training attribute to value. +// If not specified, defaults to true +func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// A RNN backed by cuDNN. // -// Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. +// Computes the RNN from the input and initial states, with respect to the params +// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) { +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicates whether there is a linear projection between the input and +// the actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. Should be +// "unidirectional" or "bidirectional". +// dropout: Dropout probability. When set to 0., dropout is disabled. +// seed: The 1st part of a seed to initialize dropout. +// seed2: The 2nd part of a seed to initialize dropout. +// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. +// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, +// num_units]. +// input_c: For LSTM, a 3-D tensor with the shape of +// [num_layer * dir, batch, num_units]. For other models, it is ignored. +// params: A 1-D tensor that contains the weights and biases in an opaque layout. +// The size must be created through CudnnRNNParamsSize, and initialized +// separately. Note that they might not be compatible across different +// generations. So it is a good idea to save and restore +// sequence_lengths: a vector of lengths of each input sequence. +// output: A 3-D tensor with the shape of [seq_length, batch_size, +// dir * num_units]. +// output_h: The same shape has input_h. +// output_c: The same shape as input_c for LSTM. An empty tensor for other models. +// is_training: Indicates whether this operation is used for inferenece or +// training. +// reserve_space: An opaque tensor that can be used in backprop calculation. It +// is only produced if is_training is true. +func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) { if scope.Err() != nil { return } @@ -16754,201 +15947,234 @@ func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_outp a(attrs) } opspec := tf.OpSpec{ - Type: "NonMaxSuppression", + Type: "CudnnRNNV3", Input: []tf.Input{ - boxes, scores, max_output_size, + input, input_h, input_c, params, sequence_lengths, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Creates a dataset that emits `components` as a tuple of tensors once. -func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { +// Applies softmax to a batched N-D `SparseTensor`. +// +// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` +// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. +// +// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost +// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly +// zero elements do not participate*. Specifically, the algorithm is equivalent +// to the following: +// +// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix +// with shape `[B, C]`, along the size-C dimension; +// (2) Masks out the original implicitly-zero locations; +// (3) Renormalizes the remaining elements. +// +// Hence, the `SparseTensor` result has exactly the same non-zero indices and +// shape. +// +// Arguments: +// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a +// SparseTensor, in canonical ordering. +// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 1-D. The `NNZ` values for the result `SparseTensor`. +func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "TensorDataset", + Type: "SparseSoftmax", Input: []tf.Input{ - tf.OutputList(components), + sp_indices, sp_values, sp_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Component-wise multiplies a SparseTensor by a dense Tensor. -// -// The output locations corresponding to the implicitly zero elements in the sparse -// tensor will be zero (i.e., will not take up storage space), regardless of the -// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN). -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. +// Creates a Tensor by indexing into the TensorList. // -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// Each row in the produced Tensor corresponds to the element in the TensorList +// specified by the given index (see `tf.gather`). // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { +// input_handle: The input tensor list. +// indices: The indices used to index into the list. +// values: The tensor. +func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "SparseDenseCwiseMul", + Type: "TensorListGather", Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, + input_handle, indices, element_shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// 2D real-valued fast Fourier transform. +// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. +type FixedLengthRecordReaderV2Attr func(optionalAttr) + +// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value. // -// Computes the 2-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 2 dimensions of `input`. +// value: Number of bytes in the header, defaults to 0. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["header_bytes"] = value + } +} + +// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value. // -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. +// value: Number of bytes in the footer, defaults to 0. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["footer_bytes"] = value + } +} + +// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value. // -// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// value: Number of bytes to hop before each read. Default of 0 means using +// record_bytes. +// If not specified, defaults to 0 +func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["hop_bytes"] = value + } +} + +// FixedLengthRecordReaderV2Container sets the optional container attribute to value. // -// Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value. // -// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value. // -// @compatibility(numpy) -// Equivalent to np.fft.rfft2 -// @end_compatibility -func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// value: The type of encoding for the file. Currently ZLIB and GZIP +// are supported. Defaults to none. +// If not specified, defaults to "" +func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr { + return func(m optionalAttr) { + m["encoding"] = value + } +} + +// A Reader that outputs fixed-length records from a file. +// +// Arguments: +// record_bytes: Number of bytes in the record. +// +// Returns The handle to reference the Reader. +func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"record_bytes": record_bytes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RFFT2D", - Input: []tf.Input{ - input, fft_length, - }, + Type: "FixedLengthRecordReaderV2", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Pads a tensor with zeros. -// -// This operation pads a `input` with zeros according to the `paddings` you -// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the -// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates -// how many zeros to add before the contents of `input` in that dimension, and -// `paddings[D, 1]` indicates how many zeros to add after the contents of `input` -// in that dimension. -// -// The padded size of each dimension D of the output is: -// -// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)` -// -// For example: -// -// ``` -// # 't' is [[1, 1], [2, 2]] -// # 'paddings' is [[1, 1], [2, 2]] -// # rank of 't' is 2 -// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0] -// [0, 0, 1, 1, 0, 0] -// [0, 0, 2, 2, 0, 0] -// [0, 0, 0, 0, 0, 0]] -// ``` -// -func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) { +// CompilationResultProto indicating the status of the TPU compilation. +func TPUCompilationResult(scope *Scope) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Pad", - Input: []tf.Input{ - input, paddings, - }, + Type: "TPUCompilationResult", } op := scope.AddOperation(opspec) return op.Output(0) } -// Checks whether a resource handle-based variable has been initialized. +// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics. // // Arguments: -// resource: the input resource handle. +// tree_ensemble_handle: Handle to the tree ensemble. // -// Returns a scalar boolean which is true if the variable has been -// initialized. -func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) { +// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest +// layer. +func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "VarIsInitializedOp", + Type: "BoostedTreesGetEnsembleStates", Input: []tf.Input{ - resource, + tree_ensemble_handle, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. -type ResourceSparseApplyFtrlAttr func(optionalAttr) +// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign. +type ResourceApplyPowerSignAttr func(optionalAttr) -// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. +// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { +func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update relevant entries in '*var' according to the Ftrl-proximal scheme. +// Update '*var' according to the AddSign update. // -// That is for rows we have grad for, we update var, accum and linear as follows: -// accum_new = accum + grad * grad -// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g +// variable <- variable - lr_t * update // // Arguments: // var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// m: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// lr_power: Scaling factor. Must be a scalar. +// logbase: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. // // Returns the created operation. -func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { +func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -16957,469 +16183,495 @@ func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, line a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyFtrl", + Type: "ResourceApplyPowerSign", Input: []tf.Input{ - var_, accum, linear, grad, indices, lr, l1, l2, lr_power, + var_, m, lr, logbase, sign_decay, beta, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Returns which elements of x are Inf. +// Deprecated. Use TensorArraySplitV3 // -// @compatibility(numpy) -// Equivalent to np.isinf -// @end_compatibility -func IsInf(scope *Scope, x tf.Output) (y tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3 +func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IsInf", + Type: "TensorArraySplitV2", Input: []tf.Input{ - x, + handle, value, lengths, flow_in, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// TruncatedNormalAttr is an optional argument to TruncatedNormal. -type TruncatedNormalAttr func(optionalAttr) - -// TruncatedNormalSeed sets the optional seed attribute to value. +// Reshapes a SparseTensor to represent values in a new dense shape. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func TruncatedNormalSeed(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// TruncatedNormalSeed2 sets the optional seed2 attribute to value. +// This operation has the same semantics as reshape on the represented dense +// tensor. The `input_indices` are recomputed based on the requested `new_shape`. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { - return func(m optionalAttr) { - m["seed2"] = value +// If one component of `new_shape` is the special value -1, the size of that +// dimension is computed so that the total dense size remains constant. At +// most one component of `new_shape` can be -1. The number of dense elements +// implied by `new_shape` must be the same as the number of dense elements +// originally implied by `input_shape`. +// +// Reshaping does not affect the order of values in the SparseTensor. +// +// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape` +// has length `R_out`, then `input_indices` has shape `[N, R_in]`, +// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and +// `output_shape` has length `R_out`. +// +// Arguments: +// input_indices: 2-D. `N x R_in` matrix with the indices of non-empty values in a +// SparseTensor. +// input_shape: 1-D. `R_in` vector with the input SparseTensor's dense shape. +// new_shape: 1-D. `R_out` vector with the requested new dense shape. +// +// Returns 2-D. `N x R_out` matrix with the updated indices of non-empty +// values in the output SparseTensor.1-D. `R_out` vector with the full dense shape of the output +// SparseTensor. This is the same as `new_shape` but with any -1 dimensions +// filled in. +func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseReshape", + Input: []tf.Input{ + input_indices, input_shape, new_shape, + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Outputs random values from a truncated normal distribution. +// Computes the product along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \prod_j data_j\\) where the product is over `j` such +// that `segment_ids[j] == i`. +// +// If the product is empty for a given segment ID `i`, `output[i] = 1`. +// +//
+// +//
+// +// For example: +// +// ``` +// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) +// tf.segment_prod(c, tf.constant([0, 0, 1])) +// # ==> [[4, 6, 6, 4], +// # [5, 6, 7, 8]] +// ``` // -// The generated values follow a normal distribution with mean 0 and standard -// deviation 1, except that values whose magnitude is more than 2 standard -// deviations from the mean are dropped and re-picked. // // Arguments: -// shape: The shape of the output tensor. -// dtype: The type of the output. // -// Returns A tensor of the specified shape filled with random truncated normal -// values. -func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TruncatedNormal", + Type: "SegmentProd", Input: []tf.Input{ - shape, + data, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SkipgramAttr is an optional argument to Skipgram. -type SkipgramAttr func(optionalAttr) - -// SkipgramWindowSize sets the optional window_size attribute to value. -// -// value: The number of words to predict to the left and right of the target. -// If not specified, defaults to 5 -func SkipgramWindowSize(value int64) SkipgramAttr { - return func(m optionalAttr) { - m["window_size"] = value - } -} +// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters. +type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr) -// SkipgramMinCount sets the optional min_count attribute to value. +// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: The minimum number of word occurrences for it to be included in the -// vocabulary. -// If not specified, defaults to 5 -func SkipgramMinCount(value int64) SkipgramAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr { return func(m optionalAttr) { - m["min_count"] = value + m["table_id"] = value } } -// SkipgramSubsample sets the optional subsample attribute to value. -// -// value: Threshold for word occurrence. Words that appear with higher -// frequency will be randomly down-sampled. Set to 0 to disable. -// If not specified, defaults to 0.001 -func SkipgramSubsample(value float32) SkipgramAttr { +// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr { return func(m optionalAttr) { - m["subsample"] = value + m["table_name"] = value } } -// Parses a text file and creates a batch of examples. +// Retrieve FTRL embedding parameters. // -// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result -// -// Arguments: -// filename: The corpus's text file name. -// batch_size: The size of produced batch. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. -func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { +// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm. +func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Skipgram", + Type: "RetrieveTPUEmbeddingFTRLParameters", Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) -} - -// StringToNumberAttr is an optional argument to StringToNumber. -type StringToNumberAttr func(optionalAttr) - -// StringToNumberOutType sets the optional out_type attribute to value. -// -// value: The numeric type to interpret each string in `string_tensor` as. -// If not specified, defaults to DT_FLOAT -func StringToNumberOutType(value tf.DataType) StringToNumberAttr { - return func(m optionalAttr) { - m["out_type"] = value - } + return op.Output(0), op.Output(1), op.Output(2) } -// Converts each string in the input Tensor to the specified numeric type. -// -// (Note that int32 overflow results in an error while float overflow -// results in a rounded value.) -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { +// Connects outputs of an N-way replicated computation to N outputs. +func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"num_replicas": num_replicas} opspec := tf.OpSpec{ - Type: "StringToNumber", + Type: "TPUReplicatedOutput", Input: []tf.Input{ - string_tensor, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("TPUReplicatedOutput", err) + return + } + return outputs } -// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. -type ResourceApplyFtrlV2Attr func(optionalAttr) +// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters. +type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr) -// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["table_id"] = value } } -// Update '*var' according to the Ftrl-proximal scheme. +// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load FTRL embedding parameters. // -// grad_with_shrinkage = grad + 2 * l2_shrinkage * var -// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage -// linear += grad_with_shrinkage + -// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -// accum = accum_new +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// linear: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regulariation. Must be a scalar. -// l2: L2 shrinkage regulariation. Must be a scalar. +// parameters: Value of parameters used in the FTRL optimization algorithm. +// accumulators: Value of accumulators used in the FTRL optimization algorithm. +// linears: Value of linears used in the FTRL optimization algorithm. +// // -// lr_power: Scaling factor. Must be a scalar. // // Returns the created operation. -func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { +func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyFtrlV2", + Type: "LoadTPUEmbeddingFTRLParameters", Input: []tf.Input{ - var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, + parameters, accumulators, linears, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// EncodeJpegAttr is an optional argument to EncodeJpeg. -type EncodeJpegAttr func(optionalAttr) - -// EncodeJpegFormat sets the optional format attribute to value. +// Returns (x - y)(x - y) element-wise. // -// value: Per pixel image format. -// If not specified, defaults to "" -func EncodeJpegFormat(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["format"] = value +// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } -} - -// EncodeJpegQuality sets the optional quality attribute to value. -// -// value: Quality of the compression from 0 to 100 (higher is better and slower). -// If not specified, defaults to 95 -func EncodeJpegQuality(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["quality"] = value + opspec := tf.OpSpec{ + Type: "SquaredDifference", + Input: []tf.Input{ + x, y, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeJpegProgressive sets the optional progressive attribute to value. +// Push an element onto the tensor_array. // -// value: If True, create a JPEG that loads progressively (coarse to fine). -// If not specified, defaults to false -func EncodeJpegProgressive(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["progressive"] = value - } -} - -// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value. +// Arguments: +// handle: The handle to a TensorArray. +// index: The position to write to inside the TensorArray. +// value: The tensor to write to the TensorArray. +// flow_in: A float scalar that enforces proper chaining of operations. // -// value: If True, spend CPU/RAM to reduce size with no quality change. -// If not specified, defaults to false -func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["optimize_size"] = value +// Returns A float scalar that enforces proper chaining of operations. +func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return } -} - -// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value. -// -// value: See http://en.wikipedia.org/wiki/Chroma_subsampling. -// If not specified, defaults to true -func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr { - return func(m optionalAttr) { - m["chroma_downsampling"] = value + opspec := tf.OpSpec{ + Type: "TensorArrayWriteV3", + Input: []tf.Input{ + handle, index, value, flow_in, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeJpegDensityUnit sets the optional density_unit attribute to value. +// RetrieveTPUEmbeddingAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingAdagradParameters. +type RetrieveTPUEmbeddingAdagradParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: Unit used to specify `x_density` and `y_density`: -// pixels per inch (`'in'`) or centimeter (`'cm'`). -// If not specified, defaults to "in" -func EncodeJpegDensityUnit(value string) EncodeJpegAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingAdagradParametersTableId(value int64) RetrieveTPUEmbeddingAdagradParametersAttr { return func(m optionalAttr) { - m["density_unit"] = value + m["table_id"] = value } } -// EncodeJpegXDensity sets the optional x_density attribute to value. -// -// value: Horizontal pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegXDensity(value int64) EncodeJpegAttr { +// RetrieveTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdagradParametersTableName(value string) RetrieveTPUEmbeddingAdagradParametersAttr { return func(m optionalAttr) { - m["x_density"] = value + m["table_name"] = value } } -// EncodeJpegYDensity sets the optional y_density attribute to value. +// Retrieve Adagrad embedding parameters. // -// value: Vertical pixels per density unit. -// If not specified, defaults to 300 -func EncodeJpegYDensity(value int64) EncodeJpegAttr { - return func(m optionalAttr) { - m["y_density"] = value +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm. +func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) { + if scope.Err() != nil { + return } -} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingAdagradParameters", -// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value. -// -// value: If not empty, embed this XMP metadata in the image header. -// If not specified, defaults to "" -func EncodeJpegXmpMetadata(value string) EncodeJpegAttr { - return func(m optionalAttr) { - m["xmp_metadata"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// JPEG-encode an image. +// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`. // -// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`. +// Each comparison returns a boolean `true` (if `input_value > threshold`) +// or and `false` otherwise. // -// The attr `format` can be used to override the color format of the encoded -// output. Values can be: +// This operation is useful for Locality-Sensitive-Hashing (LSH) and other +// algorithms that use hashing approximations of cosine and `L2` distances; +// codes can be generated from an input via: // -// * `''`: Use a default format based on the number of channels in the image. -// * `grayscale`: Output a grayscale JPEG image. The `channels` dimension -// of `image` must be 1. -// * `rgb`: Output an RGB JPEG image. The `channels` dimension -// of `image` must be 3. +// ```python +// codebook_size = 50 +// codebook_bits = codebook_size * 32 +// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits], +// dtype=x.dtype, +// initializer=tf.orthogonal_initializer()) +// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.) +// codes = tf.bitcast(codes, tf.int32) # go from uint8 to int32 +// # now codes has shape x.shape[:-1] + [codebook_size] +// ``` // -// If `format` is not specified or is the empty string, a default format is picked -// in function of the number of channels in `image`: +// **NOTE**: Currently, the innermost dimension of the tensor must be divisible +// by 8. // -// * 1: Output a grayscale image. -// * 3: Output an RGB image. +// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is +// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`. // // Arguments: -// image: 3-D with shape `[height, width, channels]`. +// input: Values to compare against `threshold` and bitpack. +// threshold: Threshold to compare against. // -// Returns 0-D. JPEG-encoded image. -func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) { +// Returns The bitpacked comparisons. +func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "EncodeJpeg", + Type: "CompareAndBitpack", Input: []tf.Input{ - image, + input, threshold, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MultinomialAttr is an optional argument to Multinomial. -type MultinomialAttr func(optionalAttr) +// QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2. +type QuantizeAndDequantizeV2Attr func(optionalAttr) -// MultinomialSeed sets the optional seed attribute to value. +// QuantizeAndDequantizeV2SignedInput sets the optional signed_input attribute to value. // -// value: If either seed or seed2 is set to be non-zero, the internal random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func MultinomialSeed(value int64) MultinomialAttr { +// value: Whether the quantization is signed or unsigned. (actually this parameter should +// have been called `signed_output`) +// If not specified, defaults to true +func QuantizeAndDequantizeV2SignedInput(value bool) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["seed"] = value + m["signed_input"] = value } } -// MultinomialSeed2 sets the optional seed2 attribute to value. +// QuantizeAndDequantizeV2NumBits sets the optional num_bits attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func MultinomialSeed2(value int64) MultinomialAttr { +// value: The bitwidth of the quantization. +// If not specified, defaults to 8 +func QuantizeAndDequantizeV2NumBits(value int64) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["num_bits"] = value } } -// MultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func MultinomialOutputDtype(value tf.DataType) MultinomialAttr { +// QuantizeAndDequantizeV2RangeGiven sets the optional range_given attribute to value. +// +// value: Whether the range is given or should be determined from the `input` tensor. +// If not specified, defaults to false +func QuantizeAndDequantizeV2RangeGiven(value bool) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["output_dtype"] = value + m["range_given"] = value } } -// Draws samples from a multinomial distribution. +// QuantizeAndDequantizeV2RoundMode sets the optional round_mode attribute to value. // -// Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. +// value: The 'round_mode' attribute controls which rounding tie-breaking algorithm is +// used when rounding float values to their quantized equivalents. The following +// rounding modes are currently supported: // -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Multinomial", - Input: []tf.Input{ - logits, num_samples, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA. -type ResourceSparseApplyAdagradDAAttr func(optionalAttr) - -// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// * HALF_TO_EVEN: this is the default round_mode. +// * HALF_UP: round towards positive. In this mode 7.5 rounds up to 8 and -7.5 +// rounds up to -7. // -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr { +// If not specified, defaults to "HALF_TO_EVEN" +func QuantizeAndDequantizeV2RoundMode(value string) QuantizeAndDequantizeV2Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["round_mode"] = value } } -// Update entries in '*var' and '*accum' according to the proximal adagrad scheme. +// Quantizes then dequantizes a tensor. // -// Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. +// This op simulates the precision loss from the quantized forward pass by: // -// Returns the created operation. -func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) { +// 1. Quantizing the tensor to fixed point numbers, which should match the target +// quantization method when it is used in inference. +// 2. Dequantizing it back to floating point numbers for the following ops, most +// likely matmul. +// +// There are different ways to quantize. This version uses only scaling, so 0.0 +// maps to 0. +// +// From the specified 'num_bits' in the quantized output type, it determines +// minimum and maximum representable quantized values. +// +// e.g. +// +// * [-128, 127] for signed, num_bits = 8, or +// * [0, 255] for unsigned, num_bits = 8. +// +// If range_given == False, the initial input_min, input_max will be determined +// automatically as the minimum and maximum values in the input tensor, otherwise +// the specified values of input_min, input_max are used. +// +// Note: If the input_min, input_max are specified, they do not need to equal the +// actual minimum and maximum values in the tensor. e.g. in some cases it may be +// beneficial to specify these values such that the low probability extremes of the +// input distribution are clipped. +// +// This op determines the maximum scale_factor that would map the initial +// [input_min, input_max] range to a range that lies within the representable +// quantized range. +// +// It determines the scale from one of input_min and input_max, then updates the +// other one to maximize the respresentable range. +// +// e.g. +// +// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, +// 5.0]: it would use a scale_factor of -128 / -10.0 = 12.8 In this case, it +// would update input_max to be 127 / 12.8 = 9.921875 +// * if the output is signed, num_bits = 8, [input_min, input_max] = [-10.0, +// 10.0]: it would use a scale_factor of 127 / 10.0 = 12.7 In this case, it +// would update input_min to be 128.0 / 12.7 = -10.07874 +// * if the output is unsigned, input_min is forced to be 0, and only the +// specified input_max is used. +// +// After determining the scale_factor and updating the input range, it applies the +// following to each value in the 'input' tensor. +// +// output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor. +// +// The above round function rounds the value based on the given round_mode. +// +// +// Arguments: +// input: Tensor to quantize and then dequantize. +// input_min: If `range_given == True`, this specifies the minimum input value that needs to +// be represented, otherwise it is determined from the min value of the `input` +// tensor. +// input_max: If `range_given == True`, this specifies the maximum input value that needs to +// be represented, otherwise it is determined from the max value of the `input` +// tensor. +func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, optional ...QuantizeAndDequantizeV2Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -17428,350 +16680,301 @@ func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumul a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyAdagradDA", + Type: "QuantizeAndDequantizeV2", Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step, + input, input_min, input_max, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process and will never change. However, it is not suitable for cryptography. -// This function may be used when CPU time is scarce and inputs are trusted or -// unimportant. There is a risk of adversaries constructing inputs that all hash -// to the same bucket. To prevent this problem, use a strong hash function with -// `tf.string_to_hash_bucket_strong`. +// A TPU core selector Op. // -// Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. +// This Op produces a set of TPU cores (for warm-up) or a single TPU core +// (for regular inference) to execute the TPU program on. The output is +// consumed by TPUPartitionedCall. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { +// Returns A vector 1 or more TPU cores. +func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "StringToHashBucketFast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, + Type: "TPUOrdinalSelector", } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the last element of the input list as well as a list with all but that element. +// Looks up keys in a table, outputs the corresponding values. // -// Fails if the list is empty. +// The tensor `keys` must of the same type as the keys of the table. +// The output `values` is of the type of the table values. // -// input_handle: the input list -// tensor: the withdrawn last element of the list -// element_dtype: the type of elements in the list -// element_shape: the shape of the output tensor -func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { +// The scalar `default_value` is the value output for keys not present in the +// table. It must also be of the same type as the table values. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// +// +// Returns Same shape as `keys`. Values found in the table, or `default_values` +// for missing keys. +func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "TensorListPopBack", + Type: "LookupTableFindV2", Input: []tf.Input{ - input_handle, element_shape, + table_handle, keys, default_value, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. -type MaxPoolGradGradAttr func(optionalAttr) +// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp. +type ResourceSparseApplyRMSPropAttr func(optionalAttr) -// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. +// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr { return func(m optionalAttr) { - m["data_format"] = value + m["use_locking"] = value } } -// Computes second-order gradients of the maxpooling function. +// Update '*var' according to the RMSProp algorithm. +// +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // // Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// var_: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. // -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. +// +// Returns the created operation. +func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolGradGrad", + Type: "ResourceSparseApplyRMSProp", Input: []tf.Input{ - orig_input, orig_output, grad, + var_, ms, mom, lr, rho, momentum, epsilon, grad, indices, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. -type TensorArrayGatherV3Attr func(optionalAttr) - -// TensorArrayGatherV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } + return scope.AddOperation(opspec) } -// Gather specific elements from the TensorArray into output `value`. -// -// All elements selected by `indices` must have the same shape. -// -// Arguments: -// handle: The handle to a TensorArray. -// indices: The locations in the TensorArray from which to read tensor elements. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// Returns the truth value of (x > y) element-wise. // -// Returns All of the elements in the TensorArray, concatenated along a new -// axis (the new dimension 0). -func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV3Attr) (value tf.Output) { +// *NOTE*: `Greater` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "TensorArrayGatherV3", + Type: "Greater", Input: []tf.Input{ - handle, indices, flow_in, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns x / y element-wise for integer types. +// Creates a TensorList by indexing into a Tensor. // -// Truncation designates that negative numbers will round fractional quantities -// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different -// than Python semantics. See `FloorDiv` for a division function that matches -// Python Semantics. +// Each member of the TensorList corresponds to one row of the input tensor, +// specified by the given index (see `tf.gather`). // -// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// tensor: The input tensor. +// indices: The indices used to index into the list. +// element_shape: The shape of the elements in the list (can be less specified than +// the shape of the tensor). +// num_elements: The size of the output list. Must be large enough to accommodate +// the largest index in indices. If -1, the list is just large enough to include +// the largest index in indices. +// output_handle: The TensorList. +func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TruncateDiv", + Type: "TensorListScatterV2", Input: []tf.Input{ - x, y, + tensor, indices, element_shape, num_elements, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// RequantizePerChannelAttr is an optional argument to RequantizePerChannel. -type RequantizePerChannelAttr func(optionalAttr) +// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox. +type SampleDistortedBoundingBoxAttr func(optionalAttr) -// RequantizePerChannelOutType sets the optional out_type attribute to value. +// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value. // -// value: The quantized type of output tensor that needs to be converted. -// If not specified, defaults to DT_QUINT8 -func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr { +// value: If either `seed` or `seed2` are set to non-zero, the random number +// generator is seeded by the given `seed`. Otherwise, it is seeded by a random +// seed. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { - m["out_type"] = value + m["seed"] = value } } -// Requantizes input with min and max values known per channel. -// -// Arguments: -// input: The original input tensor. -// input_min: The minimum value of the input tensor -// input_max: The maximum value of the input tensor. -// requested_output_min: The minimum value of the output tensor requested. -// requested_output_max: The maximum value of the output tensor requested. +// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value. // -// Returns Output tensor.The minimum value of the final output tensorThe maximum value of the final output tensor. -func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RequantizePerChannel", - Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, - }, - Attrs: attrs, +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) } -// Restores tensors from a V2 checkpoint. -// -// For backward compatibility with the V1 format, this Op currently allows -// restoring from a V1 checkpoint as well: -// - This Op first attempts to find the V2 index file pointed to by "prefix", and -// if found proceed to read it as a V2 checkpoint; -// - Otherwise the V1 read path is invoked. -// Relying on this behavior is not recommended, as the ability to fall back to read -// V1 might be deprecated and eventually removed. -// -// By default, restores the named tensors in full. If the caller wishes to restore -// specific slices of stored tensors, "shape_and_slices" should be non-empty -// strings and correspondingly well-formed. -// -// Callers must ensure all the named tensors are indeed stored in the checkpoint. -// -// Arguments: -// prefix: Must have a single element. The prefix of a V2 checkpoint. -// tensor_names: shape {N}. The names of the tensors to be restored. -// shape_and_slices: shape {N}. The slice specs of the tensors to be restored. -// Empty strings indicate that they are non-partitioned tensors. -// dtypes: shape {N}. The list of expected dtype for the tensors. Must match -// those stored in the checkpoint. +// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value. // -// Returns shape {N}. The restored tensors, whose shapes are read from the -// checkpoint directly. -func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - opspec := tf.OpSpec{ - Type: "RestoreV2", - Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil { - scope.UpdateErr("RestoreV2", err) - return +// value: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. +// If not specified, defaults to 0.1 +func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["min_object_covered"] = value } - return tensors } -// Receives a tensor value broadcast from another device. -func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { - if scope.Err() != nil { - return +// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value. +// +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value } - attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} - opspec := tf.OpSpec{ - Type: "CollectiveBcastRecv", +} - Attrs: attrs, +// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. +// +// value: The cropped area of the image must contain a fraction of the +// supplied image within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["area_range"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Decode web-safe base64-encoded strings. -// -// Input may or may not have padding at the end. See EncodeBase64 for padding. -// Web-safe means that input must use - and _ instead of + and /. -// -// Arguments: -// input: Base64 strings to decode. +// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value. // -// Returns Decoded strings. -func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DecodeBase64", - Input: []tf.Input{ - input, - }, +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr { + return func(m optionalAttr) { + m["max_attempts"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad. -type ResourceSparseApplyProximalAdagradAttr func(optionalAttr) - -// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. +// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. // -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. // If not specified, defaults to false -func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr { +func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["use_image_if_no_bounding_boxes"] = value } } -// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm. +// Generate a single randomly distorted bounding box for an image. // -// That is for rows we have grad for, we update var and accum as follows: -// accum += grad * grad -// prox_v = var -// prox_v -= lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving +// its content, i.e. *data augmentation*. This Op outputs a randomly distorted +// localization of an object, i.e. bounding box, given an `image_size`, +// `bounding_boxes` and a series of constraints. +// +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. +// +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, +// +// ```python +// # Generate a single distorted bounding box. +// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( +// tf.shape(image), +// bounding_boxes=bounding_boxes) +// +// # Draw the bounding box in an image summary. +// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), +// bbox_for_draw) +// tf.summary.image('images_with_box', image_with_box) +// +// # Employ the bounding box to distort the image. +// distorted_image = tf.slice(image, begin, size) +// ``` +// +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Learning rate. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. // -// Returns the created operation. -func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) { +// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) { if scope.Err() != nil { return } @@ -17780,192 +16983,263 @@ func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.O a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalAdagrad", + Type: "SampleDistortedBoundingBox", Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, indices, + image_size, bounding_boxes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad. -type MaxPool3DGradAttr func(optionalAttr) +// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate. +type ResourceScatterNdUpdateAttr func(optionalAttr) -// MaxPool3DGradDataFormat sets the optional data_format attribute to value. +// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr { +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. +// If not specified, defaults to true +func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr { return func(m optionalAttr) { - m["data_format"] = value + m["use_locking"] = value } } -// Computes gradients of max pooling function. +// Applies sparse `updates` to individual values or slices within a given // -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: Output backprop of shape `[batch, depth, rows, cols, channels]`. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) { +// variable according to `indices`. +// +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. +// +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: +// +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. +// ``` +// +// For example, say we want to update 4 scattered elements to a rank-1 tensor to +// 8 elements. In Python, that update would look like this: +// +// ```python +// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8]) +// indices = tf.constant([[4], [3], [1] ,[7]]) +// updates = tf.constant([9, 10, 11, 12]) +// update = tf.scatter_nd_update(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(update) +// ``` +// +// The resulting update to ref would look like this: +// +// [1, 11, 3, 10, 9, 6, 7, 12] +// +// See `tf.scatter_nd` for more details about how to make updates to +// slices. +// +// Arguments: +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of updated +// values to add to ref. +// +// Returns the created operation. +func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3DGrad", + Type: "ResourceScatterNdUpdate", Input: []tf.Input{ - orig_input, orig_output, grad, + ref, indices, updates, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns the name of the device on which `resource` has been placed. -func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ExperimentalIteratorGetDevice", - Input: []tf.Input{ - resource, - }, +// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. +type UnicodeDecodeWithOffsetsAttr func(optionalAttr) + +// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value. +// +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr { + return func(m optionalAttr) { + m["errors"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// SparseReduceSumAttr is an optional argument to SparseReduceSum. -type SparseReduceSumAttr func(optionalAttr) +// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value. +// +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD or U+65533.) +// If not specified, defaults to 65533 +func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr { + return func(m optionalAttr) { + m["replacement_char"] = value + } +} -// SparseReduceSumKeepDims sets the optional keep_dims attribute to value. +// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value. // -// value: If true, retain reduced dimensions with length 1. +// value: Whether to replace the C0 control characters (00-1F) with the +// `replacement_char`. Default is false. // If not specified, defaults to false -func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr { +func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["replace_control_characters"] = value } } -// Computes the sum of elements across dimensions of a SparseTensor. +// Decodes each string in `input` into a sequence of Unicode code points. // -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_sum()`. In particular, this Op also returns a dense `Tensor` -// instead of a sparse one. +// The character codepoints for all strings are returned using a single vector +// `char_values`, with strings expanded to characters in row-major order. +// Similarly, the character start byte offsets are returned using a single vector +// `char_to_byte_starts`, with strings expanded in row-major order. // -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. +// The `row_splits` tensor indicates where the codepoints and start offsets for +// each input string begin and end within the `char_values` and +// `char_to_byte_starts` tensors. In particular, the values for the `i`th +// string (in row-major order) are stored in the slice +// `[row_splits[i]:row_splits[i+1]]`. Thus: // -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th +// character in the `i`th string (in row-major order). +// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th +// character in the `i`th string (in row-major order). +// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th +// string (in row-major order). // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +// input: The text to be decoded. Can have any shape. Note that the output is flattened +// to a vector of char values. +// input_encoding: Text encoding of the input strings. This is any of the encodings supported +// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. // -// Returns `R-K`-D. The reduced Tensor. -func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) { +// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.A 1D int32 Tensor containing the byte index in the input string where each +// character in `char_values` starts. +func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"input_encoding": input_encoding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceSum", + Type: "UnicodeDecodeWithOffsets", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Records the latency of producing `input_dataset` elements in a StatsAggregator. -func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns x - y element-wise. +// +// *NOTE*: `Subtract` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalLatencyStatsDataset", + Type: "Sub", Input: []tf.Input{ - input_dataset, tag, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul. -type SparseTensorDenseMatMulAttr func(optionalAttr) +// LRNAttr is an optional argument to LRN. +type LRNAttr func(optionalAttr) -// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value. +// LRNDepthRadius sets the optional depth_radius attribute to value. // -// value: Use the adjoint of A in the matrix multiply. If A is complex, this -// is transpose(conj(A)). Otherwise it's transpose(A). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr { +// value: 0-D. Half-width of the 1-D normalization window. +// If not specified, defaults to 5 +func LRNDepthRadius(value int64) LRNAttr { return func(m optionalAttr) { - m["adjoint_a"] = value + m["depth_radius"] = value } } -// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value. +// LRNBias sets the optional bias attribute to value. // -// value: Use the adjoint of B in the matrix multiply. If B is complex, this -// is transpose(conj(B)). Otherwise it's transpose(B). -// If not specified, defaults to false -func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr { +// value: An offset (usually positive to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNBias(value float32) LRNAttr { return func(m optionalAttr) { - m["adjoint_b"] = value + m["bias"] = value } } -// Multiply SparseTensor (of rank 2) "A" by dense matrix "B". +// LRNAlpha sets the optional alpha attribute to value. // -// No validity checking is performed on the indices of A. However, the following -// input format is recommended for optimal behavior: +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNAlpha(value float32) LRNAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNBeta sets the optional beta attribute to value. // -// if adjoint_a == false: -// A should be sorted in lexicographically increasing order. Use SparseReorder -// if you're not sure. -// if adjoint_a == true: -// A should be sorted in order of increasing dimension 1 (i.e., "column major" -// order instead of "row major" order). +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNBeta(value float32) LRNAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Local Response Normalization. +// +// The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last +// dimension), and each vector is normalized independently. Within a given vector, +// each component is divided by the weighted, squared sum of inputs within +// `depth_radius`. In detail, +// +// sqr_sum[a, b, c, d] = +// sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2) +// output = input / (bias + alpha * sqr_sum) ** beta +// +// For details, see [Krizhevsky et al., ImageNet classification with deep +// convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix. -// a_values: 1-D. The `values` of the `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the `SparseTensor`, size `[2]` Vector. -// b: 2-D. A dense Matrix. -func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) { +// input: 4-D. +func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -17974,9 +17248,9 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "SparseTensorDenseMatMul", + Type: "LRN", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + input, }, Attrs: attrs, } @@ -17984,130 +17258,90 @@ func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Outp return op.Output(0) } -// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. -type ResourceApplyRMSPropAttr func(optionalAttr) +// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug. +type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr) -// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. +// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If `True`, updating of the var, ms, and mom tensors is protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["table_id"] = value } } -// Update '*var' according to the RMSProp algorithm. -// -// Note that in dense implementation of this algorithm, ms and mom will -// update even if the grad is zero, but in this sparse implementation, ms -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -// -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom -// -// Arguments: -// var_: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. +// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve proximal Adagrad embedding parameters with debug support. // -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns the created operation. -func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { +// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm.Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm. +func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyRMSProp", - Input: []tf.Input{ - var_, ms, mom, lr, rho, momentum, epsilon, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} + Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug", -// Store the input tensor in the state of the current session. -// -// Arguments: -// value: The tensor to be stored. -// -// Returns The handle for the tensor stored in the session state, represented -// as a ResourceHandle object. -func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GetSessionHandleV2", - Input: []tf.Input{ - value, - }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam. -type ResourceApplyAdamAttr func(optionalAttr) +// ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad. +type ResourceSparseApplyAdagradAttr func(optionalAttr) -// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value. +// ResourceSparseApplyAdagradUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, m, and v tensors will be protected +// value: If `True`, updating of the var and accum tensors will be protected // by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr { +func ResourceSparseApplyAdagradUseLocking(value bool) ResourceSparseApplyAdagradAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, uses the nesterov update. -// If not specified, defaults to false -func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { +// ResourceSparseApplyAdagradUpdateSlots sets the optional update_slots attribute to value. +// If not specified, defaults to true +func ResourceSparseApplyAdagradUpdateSlots(value bool) ResourceSparseApplyAdagradAttr { return func(m optionalAttr) { - m["use_nesterov"] = value + m["update_slots"] = value } } -// Update '*var' according to the Adam algorithm. +// Update relevant entries in '*var' and '*accum' according to the adagrad scheme. // -// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ -// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ -// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -// $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ +// That is for rows we have grad for, we update var and accum as follows: +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) // // Arguments: // var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// beta1_power: Must be a scalar. -// beta2_power: Must be a scalar. -// lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. // grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // // Returns the created operation. -func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) { +func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -18116,127 +17350,150 @@ func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, b a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdam", + Type: "ResourceSparseApplyAdagrad", Input: []tf.Input{ - var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, + var_, accum, lr, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// SerializeManySparseAttr is an optional argument to SerializeManySparse. -type SerializeManySparseAttr func(optionalAttr) +// LoadTPUEmbeddingMomentumParametersAttr is an optional argument to LoadTPUEmbeddingMomentumParameters. +type LoadTPUEmbeddingMomentumParametersAttr func(optionalAttr) -// SerializeManySparseOutType sets the optional out_type attribute to value. +// LoadTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingMomentumParametersTableId(value int64) LoadTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { - m["out_type"] = value + m["table_id"] = value } } -// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object. -// -// The `SparseTensor` must have rank `R` greater than 1, and the first dimension -// is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The serialized -// `SparseTensor` objects going into each row of `serialized_sparse` will have -// rank `R-1`. +// LoadTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMomentumParametersTableName(value string) LoadTPUEmbeddingMomentumParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load Momentum embedding parameters. // -// The minibatch size `N` is extracted from `sparse_shape[0]`. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) { +// parameters: Value of parameters used in the Momentum optimization algorithm. +// momenta: Value of momenta used in the Momentum optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingMomentumParameters(scope *Scope, parameters tf.Output, momenta tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SerializeManySparse", + Type: "LoadTPUEmbeddingMomentumParameters", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + parameters, momenta, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes inverse hyperbolic cosine of x element-wise. -func Acosh(scope *Scope, x tf.Output) (y tf.Output) { +// Assigns sparse updates to the variable referenced by `resource`. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] = updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = updates[i, ..., j, ...] +// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Acosh", + Type: "ResourceScatterUpdate", Input: []tf.Input{ - x, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// TensorArrayV2Attr is an optional argument to TensorArrayV2. -type TensorArrayV2Attr func(optionalAttr) - -// TensorArrayV2ElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} - -// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value. -// If not specified, defaults to false -func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } + return scope.AddOperation(opspec) } -// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value. -// If not specified, defaults to true -func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr { - return func(m optionalAttr) { - m["clear_after_read"] = value - } -} +// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. +type HistogramFixedWidthAttr func(optionalAttr) -// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value. -// If not specified, defaults to "" -func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr { +// HistogramFixedWidthDtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT32 +func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { return func(m optionalAttr) { - m["tensor_array_name"] = value + m["dtype"] = value } } -// Deprecated. Use TensorArrayV3 +// Return histogram of values. // -// DEPRECATED at GraphDef version 26: Use TensorArrayV3 -func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) { +// Given the tensor `values`, this operation returns a rank 1 histogram counting +// the number of entries in `values` that fall into every bin. The bins are +// equal width and determined by the arguments `value_range` and `nbins`. +// +// ```python +// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) +// nbins = 5 +// value_range = [0.0, 5.0] +// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] +// +// with tf.get_default_session() as sess: +// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) +// variables.global_variables_initializer().run() +// sess.run(hist) => [2, 1, 1, 0, 2] +// ``` +// +// Arguments: +// values: Numeric `Tensor`. +// value_range: Shape [2] `Tensor` of same `dtype` as `values`. +// values <= value_range[0] will be mapped to hist[0], +// values >= value_range[1] will be mapped to hist[-1]. +// nbins: Scalar `int32 Tensor`. Number of histogram bins. +// +// Returns A 1-D `Tensor` holding histogram of values. +func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayV2", + Type: "HistogramFixedWidth", Input: []tf.Input{ - size, + values, value_range, nbins, }, Attrs: attrs, } @@ -18244,118 +17501,58 @@ func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ... return op.Output(0) } -// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. -type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) - -// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. +// Elementwise computes the bitwise right-shift of `x` and `y`. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// Performs a logical shift for unsigned integer types, and an arithmetic shift +// for signed integer types. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// If `y` is negative, or greater than or equal to than the width of `x` in bits +// the result is implementation defined. +func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ThreadUnsafeUnigramCandidateSampler", + Type: "RightShift", Input: []tf.Input{ - true_classes, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// MaxPoolV2Attr is an optional argument to MaxPoolV2. -type MaxPoolV2Attr func(optionalAttr) +// TensorListStackAttr is an optional argument to TensorListStack. +type TensorListStackAttr func(optionalAttr) -// MaxPoolV2DataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { +// TensorListStackNumElements sets the optional num_elements attribute to value. +// If not specified, defaults to -1 +func TensorListStackNumElements(value int64) TensorListStackAttr { return func(m optionalAttr) { - m["data_format"] = value + m["num_elements"] = value } } -// Performs max pooling on the input. +// Stacks all tensors in the list. // -// Arguments: -// input: 4-D input to pool over. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. +// Requires that all tensors have the same shape. // -// Returns The max pooled output tensor. -func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { +// input_handle: the input list +// tensor: the gathered result +// num_elements: optional. If not -1, the number of elements in the list. +// +func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"padding": padding} + attrs := map[string]interface{}{"element_dtype": element_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPoolV2", + Type: "TensorListStack", Input: []tf.Input{ - input, ksize, strides, + input_handle, element_shape, }, Attrs: attrs, } @@ -18363,33 +17560,64 @@ func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output return op.Output(0) } -// Does nothing. Serves as a control trigger for scheduling. +// A placeholder op for a value that will be fed into the computation. // -// Only useful as a placeholder for control edges. +// Arguments: +// dtype: The type of elements in the tensor. +// shape: The shape of the tensor. // -// Returns the created operation. -func ControlTrigger(scope *Scope) (o *tf.Operation) { +// Returns A tensor that will be provided using the infeed mechanism. +func InfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} opspec := tf.OpSpec{ - Type: "ControlTrigger", + Type: "InfeedDequeue", + + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Deprecated. Use TensorArrayReadV3 +// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform. +type StatelessRandomUniformAttr func(optionalAttr) + +// StatelessRandomUniformDtype sets the optional dtype attribute to value. // -// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 -func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom random values from a uniform distribution. +// +// The generated values follow a uniform distribution in the range `[0, 1)`. The +// lower bound 0 is included in the range, while the upper bound 1 is excluded. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorArrayReadV2", + Type: "StatelessRandomUniform", Input: []tf.Input{ - handle, index, flow_in, + shape, seed, }, Attrs: attrs, } @@ -18397,206 +17625,180 @@ func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in return op.Output(0) } -// Batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// Makes its input available to the next iteration. // // Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// beta: A 1D beta Tensor with size matching the last dimension of t. -// An offset to be added to the normalized tensor. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this tensor will be multiplied -// with the normalized tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { +// data: The tensor to be made available to the next iteration. +// +// Returns The same tensor as `data`. +func NextIteration(scope *Scope, data tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalization", + Type: "NextIteration", Input: []tf.Input{ - t, m, v, beta, gamma, + data, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap. -type AddManySparseToTensorsMapAttr func(optionalAttr) - -// AddManySparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr { - return func(m optionalAttr) { - m["container"] = value +// Output a fact about factorials. +func Fact(scope *Scope) (fact tf.Output) { + if scope.Err() != nil { + return } + opspec := tf.OpSpec{ + Type: "Fact", + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value. +// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping. +type GenerateVocabRemappingAttr func(optionalAttr) + +// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value. // -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr { +// value: Number of entries in the old vocab file to consider. If -1, +// use the entire old vocabulary. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["old_vocab_size"] = value } } -// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles. +// Given a path to new and old vocabulary files, returns a remapping Tensor of // -// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`, where +// length `num_new_vocab`, where `remapping[i]` contains the row number in the old +// vocabulary that corresponds to row `i` in the new vocabulary (starting at line +// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` +// in the new vocabulary is not in the old vocabulary. The old vocabulary is +// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the +// default value of -1. // -// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R``` +// `num_vocab_offset` enables +// use in the partitioned variable case, and should generally be set through +// examining partitioning info. The format of the files should be a text file, +// with each line containing a single entity within the vocabulary. // -// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor` -// having a first `sparse_indices` column taking values between `[0, N)`, where -// the minibatch size `N == sparse_shape[0]`. +// For example, with `new_vocab_file` a text file containing each of the following +// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], +// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be +// `[0, -1, 2]`. // -// The input `SparseTensor` must have rank `R` greater than 1, and the first -// dimension is treated as the minibatch dimension. Elements of the `SparseTensor` -// must be sorted in increasing order of this first dimension. The stored -// `SparseTensor` objects pointed to by each row of the output `sparse_handles` -// will have rank `R-1`. +// The op also returns a count of how many entries in the new vocabulary +// were present in the old vocabulary, which is used to calculate the number of +// values to initialize in a weight matrix remapping // -// The `SparseTensor` values can then be read out as part of a minibatch by passing -// the given keys as vector elements to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddManySparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// This functionality can be used to remap both row vocabularies (typically, +// features) and column vocabularies (typically, classes) from TensorFlow +// checkpoints. Note that the partitioning logic relies on contiguous vocabularies +// corresponding to div-partitioned variables. Moreover, the underlying remapping +// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should +// use the corresponding index_table_from_file() as the FeatureColumn framework +// does (as opposed to tf.feature_to_id(), which uses a CuckooTable). // // Arguments: -// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. -// `sparse_indices[:, 0]` must be ordered values in `[0, N)`. -// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. -// The minibatch size `N == sparse_shape[0]`. +// new_vocab_file: Path to the new vocab file. +// old_vocab_file: Path to the old vocab file. +// new_vocab_offset: How many entries into the new vocab file to start reading. +// num_new_vocab: Number of entries in the new vocab file to remap. // -// Returns 1-D. The handles of the `SparseTensor` now stored in the -// `SparseTensorsMap`. Shape: `[N]`. -func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) { +// Returns A Tensor of length num_new_vocab where the element at index i +// is equal to the old ID that maps to the new ID i. This element is -1 for any +// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab. +func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AddManySparseToTensorsMap", + Type: "GenerateVocabRemapping", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + new_vocab_file, old_vocab_file, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Concatenates tensors along one dimension. +// Worker heartbeat op. +// +// Heartbeats may be sent periodically to indicate the coordinator is still active, +// to retrieve the current worker status and to expedite shutdown when necessary. // // Arguments: -// values: List of `N` Tensors to concatenate. Their ranks and types must match, -// and their sizes must match in all dimensions except `concat_dim`. -// axis: 0-D. The dimension along which to concatenate. Must be in the -// range [-rank(values), rank(values)). +// request: A string tensor containing a serialized WorkerHeartbeatRequest // -// Returns A `Tensor` with the concatenation of values stacked along the -// `concat_dim` dimension. This tensor's shape matches that of `values` except -// in `concat_dim` where it has the sum of the sizes. -func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { +// Returns A string tensor containing a serialized WorkerHeartbeatResponse +func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ConcatV2", + Type: "WorkerHeartbeat", Input: []tf.Input{ - tf.OutputList(values), axis, + request, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reads and outputs the entire contents of the input filename. -func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { +// Returns the truth value of (x <= y) element-wise. +// +// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReadFile", + Type: "LessEqual", Input: []tf.Input{ - filename, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns immutable tensor from memory region. -// -// The current implementation memmaps the tensor from a file. -// -// Arguments: -// dtype: Type of the returned tensor. -// shape: Shape of the returned tensor. -// memory_region_name: Name of readonly memory region used by the tensor, see -// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. -func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} - opspec := tf.OpSpec{ - Type: "ImmutableConst", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StringJoinAttr is an optional argument to StringJoin. -type StringJoinAttr func(optionalAttr) +// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch. +type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr) -// StringJoinSeparator sets the optional separator attribute to value. +// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value. // -// value: string, an optional join separator. -// If not specified, defaults to "" -func StringJoinSeparator(value string) StringJoinAttr { +// value: The TPU device to use. Should be >= 0 and less than the number +// of TPU cores in the task on which the node is placed. +// If not specified, defaults to -1 +func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr { return func(m optionalAttr) { - m["separator"] = value + m["device_ordinal"] = value } } -// Joins the strings in the given list of string tensors into one tensor; -// -// with the given separator (default is an empty separator). +// An op that enqueues a list of input batch tensors to TPUEmbedding. // // Arguments: -// inputs: A list of string tensors. The tensors must all have the same shape, -// or be scalars. Scalars may be mixed in; these will be broadcast to the shape -// of non-scalar inputs. -func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { +// batch: A list of 1D tensors, one for each embedding table, containing the +// indices into the tables. +// mode_override: A string input that overrides the mode specified in the +// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', +// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set +// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. +// +// Returns the created operation. +func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -18605,100 +17807,185 @@ func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (o a(attrs) } opspec := tf.OpSpec{ - Type: "StringJoin", + Type: "EnqueueTPUEmbeddingIntegerBatch", Input: []tf.Input{ - tf.OutputList(inputs), + tf.OutputList(batch), mode_override, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates and returns an empty tensor list. +// An op that receives embedding activations on the TPU. // -// All list elements must be tensors of dtype element_dtype and shape compatible -// with element_shape. +// The TPU system performs the embedding lookups and aggregations specified by +// the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The +// results of these aggregations are visible to the Tensorflow Graph as the +// outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing +// one Tensor of activations per table specified in the model. There can be at +// most one RecvTPUEmbeddingActivations op in the TPU graph. // -// handle: an empty tensor list. -// element_dtype: the type of elements in the list. -// element_shape: a shape compatible with that of elements in the list. -func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { +// Arguments: +// num_outputs: The number of output activation tensors, equal to the number of +// embedding tables in the model. +// config: Serialized TPUEmbeddingConfiguration proto. +// +// Returns A TensorList of embedding activations containing one Tensor per +// embedding table in the model. +func RecvTPUEmbeddingActivations(scope *Scope, num_outputs int64, config string) (outputs []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} + attrs := map[string]interface{}{"num_outputs": num_outputs, "config": config} opspec := tf.OpSpec{ - Type: "EmptyTensorList", - Input: []tf.Input{ - element_shape, max_num_elements, - }, + Type: "RecvTPUEmbeddingActivations", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("RecvTPUEmbeddingActivations", err) + return + } + return outputs } -// Computes softsign gradients for a softsign operation. +// Selects elements from `x` or `y`, depending on `condition`. +// +// The `x`, and `y` tensors must all have the same shape, and the +// output will also have that shape. +// +// The `condition` tensor must be a scalar if `x` and `y` are scalars. +// If `x` and `y` are vectors or higher rank, then `condition` must be either a +// scalar, a vector with size matching the first dimension of `x`, or must have +// the same shape as `x`. +// +// The `condition` tensor acts as a mask that chooses, based on the value at each +// element, whether the corresponding element / row in the output should be +// taken from `x` (if true) or `y` (if false). +// +// If `condition` is a vector and `x` and `y` are higher rank matrices, then +// it chooses which row (outer dimension) to copy from `x` and `y`. +// If `condition` has the same shape as `x` and `y`, then it chooses which +// element to copy from `x` and `y`. +// +// For example: +// +// ```python +// # 'condition' tensor is [[True, False] +// # [False, True]] +// # 't' is [[1, 2], +// # [3, 4]] +// # 'e' is [[5, 6], +// # [7, 8]] +// select(condition, t, e) # => [[1, 6], [7, 4]] +// +// +// # 'condition' tensor is [True, False] +// # 't' is [[1, 2], +// # [3, 4]] +// # 'e' is [[5, 6], +// # [7, 8]] +// select(condition, t, e) ==> [[1, 2], +// [7, 8]] +// +// ``` // // Arguments: -// gradients: The backpropagated gradients to the corresponding softsign operation. -// features: The features passed as input to the corresponding softsign operation. // -// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. -func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// x: = A `Tensor` which may have the same shape as `condition`. +// If `condition` is rank 1, `x` may have higher rank, +// but its first dimension must match the size of `condition`. +// y: = A `Tensor` with the same type and shape as `x`. +// +// Returns = A `Tensor` with the same type and shape as `x` and `y`. +func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SoftsignGrad", + Type: "Select", Input: []tf.Input{ - gradients, features, + condition, x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Provides the time since epoch in seconds. +// Returns the set of files matching one or more glob patterns. // -// Returns the timestamp as a `float64` for seconds since the Unix epoch. +// Note that this routine only supports wildcard characters in the +// basename portion of the pattern, not in the directory portion. +// Note also that the order of filenames returned can be non-deterministic. // -// Note: the timestamp is computed when the op is executed, not when it is added -// to the graph. -func Timestamp(scope *Scope) (ts tf.Output) { +// Arguments: +// pattern: Shell wildcard pattern(s). Scalar or vector of type string. +// +// Returns A vector of matching filenames. +func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Timestamp", + Type: "MatchingFiles", + Input: []tf.Input{ + pattern, + }, } op := scope.AddOperation(opspec) return op.Output(0) } -// VariableShapeAttr is an optional argument to VariableShape. -type VariableShapeAttr func(optionalAttr) +// SqueezeAttr is an optional argument to Squeeze. +type SqueezeAttr func(optionalAttr) -// VariableShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func VariableShapeOutType(value tf.DataType) VariableShapeAttr { +// SqueezeAxis sets the optional axis attribute to value. +// +// value: If specified, only squeezes the dimensions listed. The dimension +// index starts at 0. It is an error to squeeze a dimension that is not 1. Must +// be in the range `[-rank(input), rank(input))`. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func SqueezeAxis(value []int64) SqueezeAttr { return func(m optionalAttr) { - m["out_type"] = value + m["squeeze_dims"] = value } } -// Returns the shape of the variable pointed to by `resource`. +// Removes dimensions of size 1 from the shape of a tensor. // -// This operation returns a 1-D integer tensor representing the shape of `input`. +// Given a tensor `input`, this operation returns a tensor of the same type with +// all dimensions of size 1 removed. If you don't want to remove all size 1 +// dimensions, you can remove specific size 1 dimensions by specifying +// `axis`. // // For example: // // ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] +// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] +// shape(squeeze(t)) ==> [2, 3] // ``` -func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { +// +// Or, to remove specific size 1 dimensions: +// +// ``` +// # 't' is a tensor of shape [1, 2, 1, 3, 1, 1] +// shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1] +// ``` +// +// Arguments: +// input: The `input` to squeeze. +// +// Returns Contains the same data as `input`, but has one or more dimensions of +// size 1 removed. +func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -18707,7 +17994,7 @@ func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "VariableShape", + Type: "Squeeze", Input: []tf.Input{ input, }, @@ -18717,439 +18004,400 @@ func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) return op.Output(0) } -// AvgPoolGradAttr is an optional argument to AvgPoolGrad. -type AvgPoolGradAttr func(optionalAttr) +// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta. +type ResourceApplyAdadeltaAttr func(optionalAttr) -// AvgPoolGradDataFormat sets the optional data_format attribute to value. +// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } +// value: If True, updating of the var, accum and update_accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } } -// Computes gradients of the average pooling function. +// Update '*var' according to the adadelta scheme. +// +// accum = rho() * accum + (1 - rho()) * grad.square(); +// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad; +// update_accum = rho() * update_accum + (1 - rho()) * update.square(); +// var -= update; // // Arguments: -// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. -// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. -// the output of `avg_pool`. -// ksize: The size of the sliding window for each dimension of the input. -// strides: The stride of the sliding window for each dimension of the input. -// padding: The type of padding algorithm to use. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// accum_update: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay factor. Must be a scalar. +// epsilon: Constant factor. Must be a scalar. +// grad: The gradient. // -// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. -func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AvgPoolGrad", + Type: "ResourceApplyAdadelta", Input: []tf.Input{ - orig_input_shape, grad, + var_, accum, accum_update, lr, rho, epsilon, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Greedily selects a subset of bounding boxes in descending order of score, +// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. +type NonMaxSuppressionAttr func(optionalAttr) + +// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value. // -// pruning away boxes that have high overlaps -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. N-by-n overlap values are supplied as square matrix, -// which allows for defining a custom overlap criterium (eg. intersection over union, -// intersection over area, etc.). +// value: A float representing the threshold for deciding whether boxes +// overlap too much with respect to IOU. +// If not specified, defaults to 0.5 +func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr { + return func(m optionalAttr) { + m["iou_threshold"] = value + } +} + +// Greedily selects a subset of bounding boxes in descending order of score, // +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Note that this +// algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. // The output of this operation is a set of integers indexing into the input // collection of bounding boxes representing the selected boxes. The bounding // box coordinates corresponding to the selected indices can then be obtained // using the `tf.gather operation`. For example: -// -// selected_indices = tf.image.non_max_suppression_with_overlaps( -// overlaps, scores, max_output_size, overlap_threshold, score_threshold) +// selected_indices = tf.image.non_max_suppression( +// boxes, scores, max_output_size, iou_threshold) // selected_boxes = tf.gather(boxes, selected_indices) // // Arguments: -// overlaps: A 2-D float tensor of shape `[num_boxes, num_boxes]` representing -// the n-by-n box overlap values. +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. // scores: A 1-D float tensor of shape `[num_boxes]` representing a single // score corresponding to each box (each row of boxes). // max_output_size: A scalar integer tensor representing the maximum number of // boxes to be selected by non max suppression. -// overlap_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. // // Returns A 1-D integer tensor of shape `[M]` representing the selected // indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.Output, max_output_size tf.Output, overlap_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { +func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "NonMaxSuppressionWithOverlaps", + Type: "NonMaxSuppression", Input: []tf.Input{ - overlaps, scores, max_output_size, overlap_threshold, score_threshold, + boxes, scores, max_output_size, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes softmax cross entropy cost and gradients to backpropagate. -// -// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept -// a matrix of label probabilities, but rather a single label per row -// of features. This label is considered to have probability 1.0 for the -// given row. -// -// Inputs are the logits, not probabilities. -// -// Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size vector with values in [0, num_classes). -// This is the label for the given minibatch entry. -// -// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). -func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { +// Creates a dataset that emits `components` as a tuple of tensors once. +func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SparseSoftmaxCrossEntropyWithLogits", + Type: "TensorDataset", Input: []tf.Input{ - features, labels, + tf.OutputList(components), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Returns the truth value of NOT x element-wise. -func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { +// VariableShapeAttr is an optional argument to VariableShape. +type VariableShapeAttr func(optionalAttr) + +// VariableShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func VariableShapeOutType(value tf.DataType) VariableShapeAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Returns the shape of the variable pointed to by `resource`. +// +// This operation returns a 1-D integer tensor representing the shape of `input`. +// +// For example: +// +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] +// ``` +func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LogicalNot", + Type: "VariableShape", Input: []tf.Input{ - x, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// 3D real-valued fast Fourier transform. -// -// Computes the 3-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most 3 dimensions of `input`. -// -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the -// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension -// of `output`: the zero-frequency term, followed by the `fft_length / 2` -// positive-frequency terms. -// -// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Store the input tensor in the state of the current session. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the their 3D Fourier transform. The -// inner-most dimension contains `fft_length / 2 + 1` unique frequency -// components. +// value: The tensor to be stored. // -// @compatibility(numpy) -// Equivalent to np.fft.rfftn with 3 dimensions. -// @end_compatibility -func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns The handle for the tensor stored in the session state, represented +// as a ResourceHandle object. +func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RFFT3D", + Type: "GetSessionHandleV2", Input: []tf.Input{ - input, fft_length, + value, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorArrayV3Attr is an optional argument to TensorArrayV3. -type TensorArrayV3Attr func(optionalAttr) - -// TensorArrayV3ElementShape sets the optional element_shape attribute to value. -// -// value: The expected shape of an element, if known. Used to -// validate the shapes of TensorArray elements. If this shape is not -// fully specified, gathering zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { - return func(m optionalAttr) { - m["element_shape"] = value - } -} +// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam. +type ResourceApplyAdamAttr func(optionalAttr) -// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. +// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value. // -// value: A boolean that determines whether writes to the TensorArray -// are allowed to grow the size. By default, this is not allowed. +// value: If `True`, updating of the var, m, and v tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. // If not specified, defaults to false -func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["dynamic_size"] = value - } -} - -// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. -// -// value: If true (default), Tensors in the TensorArray are cleared -// after being read. This disables multiple read semantics but allows early -// release of memory. -// If not specified, defaults to true -func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { +func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr { return func(m optionalAttr) { - m["clear_after_read"] = value + m["use_locking"] = value } } -// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. +// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value. // -// value: If true (default is false), then all -// elements in the TensorArray will be expected to have have identical shapes. -// This allows certain behaviors, like dynamically checking for -// consistent shapes on write, and being able to fill in properly -// shaped zero tensors on stack -- even if the element_shape attribute -// is not fully defined. +// value: If `True`, uses the nesterov update. // If not specified, defaults to false -func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { - return func(m optionalAttr) { - m["identical_element_shapes"] = value - } -} - -// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. -// -// value: Overrides the name used for the temporary tensor_array -// resource. Default value is the name of the 'TensorArray' op (which -// is guaranteed unique). -// If not specified, defaults to "" -func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { +func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr { return func(m optionalAttr) { - m["tensor_array_name"] = value + m["use_nesterov"] = value } } -// An array of Tensors of given size. +// Update '*var' according to the Adam algorithm. // -// Write data via Write and read via Read or Pack. +// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ +// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ +// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ +// $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$ // // Arguments: -// size: The size of the array. -// dtype: The type of the elements on the tensor_array. +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// v: Should be from a Variable(). +// beta1_power: Must be a scalar. +// beta2_power: Must be a scalar. +// lr: Scaling factor. Must be a scalar. +// beta1: Momentum factor. Must be a scalar. +// beta2: Momentum factor. Must be a scalar. +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. // -// Returns The handle to the TensorArray.A scalar used to control gradient flow. -func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { +// Returns the created operation. +func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayV3", + Type: "ResourceApplyAdam", Input: []tf.Input{ - size, + var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// Runs multiple additive regression ensemble predictors on input instances and -// -// computes the logits. It is designed to be used during prediction. -// It traverses all the trees and calculates the final score for each instance. -// -// Arguments: -// -// bucketized_features: A list of rank 1 Tensors containing bucket id for each -// feature. -// logits_dimension: scalar, dimension of the logits, to be used for partial logits -// shape. -// -// Returns Output rank 2 Tensor containing logits for each example. -func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"logits_dimension": logits_dimension} - opspec := tf.OpSpec{ - Type: "BoostedTreesPredict", - Input: []tf.Input{ - tree_ensemble_handle, tf.OutputList(bucketized_features), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} +// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. +type SdcaOptimizerAttr func(optionalAttr) -// Elementwise computes the bitwise OR of `x` and `y`. +// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. // -// The result will have those bits set, that are set in `x`, `y` or both. The -// computation is performed on the underlying representations of `x` and `y`. -func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BitwiseOr", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. -type MatrixSolveLsAttr func(optionalAttr) - -// MatrixSolveLsFast sets the optional fast attribute to value. +// value: Whether to use Adaptive SDCA for the inner loop. // If not specified, defaults to true -func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { +func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { return func(m optionalAttr) { - m["fast"] = value + m["adaptative"] = value } } -// Solves one or more linear least-squares problems. +// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for // -// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same -// type as `matrix` and shape `[..., M, K]`. -// The output is a tensor shape `[..., N, K]` where each output matrix solves -// each of the equations -// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` -// in the least squares sense. +// linear models with L1 + L2 regularization. As global optimization objective is +// strongly-convex, the optimizer optimizes the dual objective at each step. The +// optimizer applies each update one example at a time. Examples are sampled +// uniformly, and the optimizer is learning rate free and enjoys linear convergence +// rate. // -// We use the following notation for (complex) matrix and right-hand sides -// in the batch: +// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
+// Shai Shalev-Shwartz, Tong Zhang. 2012 // -// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), -// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), -// `output`=\\(X \in \mathbb{C}^{n \times k}\\), -// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). +// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ // -// If `fast` is `True`, then the solution is computed by solving the normal -// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then -// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares -// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). -// If \\(m \lt n\\) then `output` is computed as -// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the -// minimum-norm solution to the under-determined linear system, i.e. -// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), -// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable -// when \\(A\\) is numerically full rank and has a condition number -// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is -// sufficiently large. +// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, +// Peter Richtarik, Martin Takac. 2015 // -// If `fast` is `False` an algorithm based on the numerically robust complete -// orthogonal decomposition is used. This computes the minimum-norm -// least-squares solution, even when \\(A\\) is rank deficient. This path is -// typically 6-7 times slower than the fast path. If `fast` is `False` then -// `l2_regularizer` is ignored. +// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 // // Arguments: -// matrix: Shape is `[..., M, N]`. -// rhs: Shape is `[..., M, K]`. -// l2_regularizer: Scalar tensor. -// -// @compatibility(numpy) -// Equivalent to np.linalg.lstsq -// @end_compatibility +// sparse_example_indices: a list of vectors which contain example indices. +// sparse_feature_indices: a list of vectors which contain feature indices. +// sparse_feature_values: a list of vectors which contains feature value +// associated with each feature group. +// dense_features: a list of matrices which contains the dense feature values. +// example_weights: a vector which contains the weight associated with each +// example. +// example_labels: a vector which contains the label/target associated with each +// example. +// sparse_indices: a list of vectors where each value is the indices which has +// corresponding weights in sparse_weights. This field maybe omitted for the +// dense approach. +// sparse_weights: a list of vectors where each value is the weight associated with +// a sparse feature group. +// dense_weights: a list of vectors where the values are the weights associated +// with a dense feature group. +// example_state_data: a list of vectors containing the example state data. +// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, +// squared and hinge losses. +// l1: Symmetric l1 regularization strength. +// l2: Symmetric l2 regularization strength. +// num_loss_partitions: Number of partitions of the global loss function. +// num_inner_iterations: Number of iterations per mini-batch. // -// Returns Shape is `[..., N, K]`. -func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { +// Returns a list of vectors containing the updated example state +// data.a list of vectors where each value is the delta +// weights associated with a sparse feature group.a list of vectors where the values are the delta +// weights associated with a dense feature group. +func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixSolveLs", + Type: "SdcaOptimizer", Input: []tf.Input{ - matrix, rhs, l2_regularizer, + tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + out_example_state_data = op.Output(idx) + if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { + scope.UpdateErr("SdcaOptimizer", err) + return + } + return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights } -// MaxPool3DAttr is an optional argument to MaxPool3D. -type MaxPool3DAttr func(optionalAttr) +// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset. +type ExperimentalParseExampleDatasetAttr func(optionalAttr) -// MaxPool3DDataFormat sets the optional data_format attribute to value. -// -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DDataFormat(value string) MaxPool3DAttr { +// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value. +// If not specified, defaults to false +func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr { return func(m optionalAttr) { - m["data_format"] = value + m["sloppy"] = value } } -// Performs 3D max pooling on the input. +// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features. // // Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. // -// Returns The max pooled output tensor. -func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { +// +// dense_defaults: A dict mapping string keys to `Tensor`s. +// The keys of the dict must match the dense_keys of the feature. +// sparse_keys: A list of string keys in the examples features. +// The results for these keys will be returned as `SparseTensor` objects. +// dense_keys: A list of Ndense string Tensors (scalars). +// The keys expected in the Examples features associated with dense values. +// sparse_types: A list of `DTypes` of the same length as `sparse_keys`. +// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), +// and `tf.string` (`BytesList`) are supported. +// dense_shapes: List of tuples with the same length as `dense_keys`. +// The shape of the data for each dense feature referenced by `dense_keys`. +// Required for any input tensors identified by `dense_keys`. Must be +// either fully defined, or may contain an unknown first dimension. +// An unknown first dimension means the feature is treated as having +// a variable number of blocks, and the output shape along this dimension +// is considered unknown at graph build time. Padding is applied for +// minibatch elements smaller than the maximum number of blocks for the +// given feature along this dimension. +// output_types: The type list for the return values. +// output_shapes: The list of shapes being produced. +func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3D", + Type: "ExperimentalParseExampleDataset", Input: []tf.Input{ - input, + input_dataset, num_parallel_calls, tf.OutputList(dense_defaults), }, Attrs: attrs, } @@ -19157,193 +18405,280 @@ func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa return op.Output(0) } -// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput. -type Conv3DBackpropInputAttr func(optionalAttr) - -// Conv3DBackpropInputDilations sets the optional dilations attribute to value. -// If not specified, defaults to -func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 +// 2D real-valued fast Fourier transform. +// +// Computes the 2-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 2 dimensions of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. +// +// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) { +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft2 +// @end_compatibility +func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Conv3DBackpropInput", + Type: "RFFT2D", Input: []tf.Input{ - input, filter, out_backprop, + input, fft_length, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. -type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) +// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl. +type ResourceSparseApplyFtrlAttr func(optionalAttr) -// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. +// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, height, width, channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, channels, height, width]. -// If not specified, defaults to "NHWC" -func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr { return func(m optionalAttr) { - m["data_format"] = value + m["use_locking"] = value } } -// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. +// Update relevant entries in '*var' according to the Ftrl-proximal scheme. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of depthwise convolution with respect to the filter. +// That is for rows we have grad for, we update var, accum and linear as follows: +// accum_new = accum + grad * grad +// linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // // Arguments: -// input: 4-D with shape based on `data_format`. For example, if -// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, -// in_width, in_channels]` tensor. -// filter_sizes: An integer vector representing the tensor shape of `filter`, -// where `filter` is a 4-D -// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. -// out_backprop: 4-D with shape based on `data_format`. -// For example, if `data_format` is 'NHWC' then -// out_backprop shape is `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. -// padding: The type of padding algorithm to use. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// lr_power: Scaling factor. Must be a scalar. // -// Returns 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. -// the `filter` input of the convolution. -func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { +// Returns the created operation. +func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DepthwiseConv2dNativeBackpropFilter", + Type: "ResourceSparseApplyFtrl", Input: []tf.Input{ - input, filter_sizes, out_backprop, + var_, accum, linear, grad, indices, lr, l1, l2, lr_power, }, Attrs: attrs, } + return scope.AddOperation(opspec) +} + +// Returns which elements of x are Inf. +// +// @compatibility(numpy) +// Equivalent to np.isinf +// @end_compatibility +func IsInf(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IsInf", + Input: []tf.Input{ + x, + }, + } op := scope.AddOperation(opspec) return op.Output(0) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. +// Gather ragged slices from `params` axis `0` according to `indices`. // -// The hash function is deterministic on the content of the string within the -// process. The hash function is a keyed hash function, where attribute `key` -// defines the key of the hash function. `key` is an array of 2 elements. +// Outputs a `RaggedTensor` output composed from `output_dense_values` and +// `output_nested_splits`, such that: +// +// ```python +// output.shape = indices.shape + params.shape[1:] +// output.ragged_rank = indices.shape.ndims + params.ragged_rank +// output[i...j, d0...dn] = params[indices[i...j], d0...dn] +// ``` +// +// where +// +// * `params = +// ragged.from_nested_row_splits(params_dense_values, params_nested_splits)` +// provides the values that should be gathered. +// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which +// values should be gathered. +// * `output = +// ragged.from_nested_row_splits(output_dense_values, output_nested_splits)` +// is the output tensor. +// +// (Note: This c++ op is used to implement the higher-level python +// `tf.ragged.gather` op, which also supports ragged indices.) // -// A strong hash is important when inputs may be malicious, e.g. URLs with -// additional components. Adversaries could try to make their inputs hash to the -// same bucket for a denial-of-service attack or to skew the results. A strong -// hash prevents this by making it difficult, if not infeasible, to compute inputs -// that hash to the same bucket. This comes at a cost of roughly 4x higher compute -// time than `tf.string_to_hash_bucket_fast`. // // Arguments: -// input: The strings to assign a hash bucket. -// num_buckets: The number of buckets. -// key: The key for the keyed hash function passed as a list of two uint64 -// elements. +// params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the +// `params` RaggedTensor input. +// params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change +// at the python level from dense_values to flat_values, so dense_values is the +// deprecated name. +// indices: Indices in the outermost dimension of `params` of the values that should be +// gathered. +// OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain +// this number of `row_splits` tensors. This value should equal +// `indices.shape.ndims + params.ragged_rank - 1`. // -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { +// Returns The `nested_row_splits` tensors that define the row-partitioning for the +// returned RaggedTensor.The `flat_values` for the returned RaggedTensor. +func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} + attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK} opspec := tf.OpSpec{ - Type: "StringToHashBucketStrong", + Type: "RaggedGather", Input: []tf.Input{ - input, + tf.OutputList(params_nested_splits), params_dense_values, indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil { + scope.UpdateErr("RaggedGather", err) + return + } + output_dense_values = op.Output(idx) + return output_nested_splits, output_dense_values +} + +// Greedily selects a subset of bounding boxes in descending order of score, +// +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Note that this +// algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// +// selected_indices = tf.image.non_max_suppression_v2( +// boxes, scores, max_output_size, iou_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) +// +// Arguments: +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. +// +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NonMaxSuppressionV2", + Input: []tf.Input{ + boxes, scores, max_output_size, iou_threshold, + }, + } + op := scope.AddOperation(opspec) return op.Output(0) } -// StringLengthAttr is an optional argument to StringLength. -type StringLengthAttr func(optionalAttr) +// TruncatedNormalAttr is an optional argument to TruncatedNormal. +type TruncatedNormalAttr func(optionalAttr) -// StringLengthUnit sets the optional unit attribute to value. +// TruncatedNormalSeed sets the optional seed attribute to value. // -// value: The unit that is counted to compute string length. One of: `"BYTE"` (for -// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8 -// encoded Unicode code points in each string). Results are undefined -// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally -// valid UTF-8. -// If not specified, defaults to "BYTE" -func StringLengthUnit(value string) StringLengthAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func TruncatedNormalSeed(value int64) TruncatedNormalAttr { return func(m optionalAttr) { - m["unit"] = value + m["seed"] = value } } -// String lengths of `input`. +// TruncatedNormalSeed2 sets the optional seed2 attribute to value. // -// Computes the length of each string given in the input tensor. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func TruncatedNormalSeed2(value int64) TruncatedNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a truncated normal distribution. +// +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // // Arguments: -// input: The string for which to compute the length. +// shape: The shape of the output tensor. +// dtype: The type of the output. // -// Returns Integer tensor that has the same shape as `input`. The output contains the -// element-wise string lengths of `input`. -func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) { +// Returns A tensor of the specified shape filled with random truncated normal +// values. +func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StringLength", + Type: "TruncatedNormal", Input: []tf.Input{ - input, + shape, }, Attrs: attrs, } @@ -19351,34 +18686,26 @@ func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) ( return op.Output(0) } -// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. -type ResourceApplyProximalGradientDescentAttr func(optionalAttr) +// StringToNumberAttr is an optional argument to StringToNumber. +type StringToNumberAttr func(optionalAttr) -// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// StringToNumberOutType sets the optional out_type attribute to value. // -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { +// value: The numeric type to interpret each string in `string_tensor` as. +// If not specified, defaults to DT_FLOAT +func StringToNumberOutType(value tf.DataType) StringToNumberAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["out_type"] = value } } -// Update '*var' as FOBOS algorithm with fixed learning rate. -// -// prox_v = var - alpha * delta -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +// Converts each string in the input Tensor to the specified numeric type. // -// Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// delta: The change. +// (Note that int32 overflow results in an error while float overflow +// results in a rounded value.) // -// Returns the created operation. -func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -19387,254 +18714,244 @@ func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyProximalGradientDescent", + Type: "StringToNumber", Input: []tf.Input{ - var_, alpha, l1, l2, delta, + string_tensor, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns 0 if the denominator is zero. +// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2. +type ResourceApplyFtrlV2Attr func(optionalAttr) + +// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Ftrl-proximal scheme. // +// grad_with_shrinkage = grad + 2 * l2_shrinkage * var +// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage +// linear += grad_with_shrinkage + +// (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var +// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 +// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 +// accum = accum_new // -// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// linear: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regulariation. Must be a scalar. +// l2: L2 shrinkage regulariation. Must be a scalar. +// +// lr_power: Scaling factor. Must be a scalar. +// +// Returns the created operation. +func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DivNoNan", + Type: "ResourceApplyFtrlV2", Input: []tf.Input{ - x, y, + var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets. -type UnicodeDecodeWithOffsetsAttr func(optionalAttr) +// SkipgramAttr is an optional argument to Skipgram. +type SkipgramAttr func(optionalAttr) -// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value. +// SkipgramWindowSize sets the optional window_size attribute to value. // -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr { +// value: The number of words to predict to the left and right of the target. +// If not specified, defaults to 5 +func SkipgramWindowSize(value int64) SkipgramAttr { return func(m optionalAttr) { - m["errors"] = value + m["window_size"] = value } } -// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value. +// SkipgramMinCount sets the optional min_count attribute to value. // -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) -// If not specified, defaults to 65533 -func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr { +// value: The minimum number of word occurrences for it to be included in the +// vocabulary. +// If not specified, defaults to 5 +func SkipgramMinCount(value int64) SkipgramAttr { return func(m optionalAttr) { - m["replacement_char"] = value + m["min_count"] = value } } -// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// SkipgramSubsample sets the optional subsample attribute to value. // -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr { +// value: Threshold for word occurrence. Words that appear with higher +// frequency will be randomly down-sampled. Set to 0 to disable. +// If not specified, defaults to 0.001 +func SkipgramSubsample(value float32) SkipgramAttr { return func(m optionalAttr) { - m["replace_control_characters"] = value + m["subsample"] = value } } -// Decodes each string in `input` into a sequence of Unicode code points. -// -// The character codepoints for all strings are returned using a single vector -// `char_values`, with strings expanded to characters in row-major order. -// Similarly, the character start byte offsets are returned using a single vector -// `char_to_byte_starts`, with strings expanded in row-major order. -// -// The `row_splits` tensor indicates where the codepoints and start offsets for -// each input string begin and end within the `char_values` and -// `char_to_byte_starts` tensors. In particular, the values for the `i`th -// string (in row-major order) are stored in the slice -// `[row_splits[i]:row_splits[i+1]]`. Thus: +// Parses a text file and creates a batch of examples. // -// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th -// character in the `i`th string (in row-major order). -// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th -// character in the `i`th string (in row-major order). -// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th -// string (in row-major order). +// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result // // Arguments: -// input: The text to be decoded. Can have any shape. Note that the output is flattened -// to a vector of char values. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +// filename: The corpus's text file name. +// batch_size: The size of produced batch. // -// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.A 1D int32 Tensor containing the byte index in the input string where each -// character in `char_values` starts. -func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) { +// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids. +func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"input_encoding": input_encoding} + attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UnicodeDecodeWithOffsets", - Input: []tf.Input{ - input, - }, + Type: "Skipgram", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6) } -// Returns x - y element-wise. +// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler. +type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr) + +// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value. // -// *NOTE*: `Subtract` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Sub", - Input: []tf.Input{ - x, y, - }, +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the max of x and y (i.e. x > y ? x : y) element-wise. +// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// *NOTE*: `Maximum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Maximum", - Input: []tf.Input{ - x, y, - }, +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes softmax cross entropy cost and gradients to backpropagate. +// Generates labels for candidate sampling with a learned unigram distribution. // -// Inputs are the logits, not probabilities. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// features: batch_size x num_classes matrix -// labels: batch_size x num_classes matrix -// The caller must ensure that each batch of labels represents a valid -// probability distribution. +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). // -// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). -func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SoftmaxCrossEntropyWithLogits", + Type: "ThreadUnsafeUnigramCandidateSampler", Input: []tf.Input{ - features, labels, + true_classes, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// ReduceJoinAttr is an optional argument to ReduceJoin. -type ReduceJoinAttr func(optionalAttr) - -// ReduceJoinKeepDims sets the optional keep_dims attribute to value. -// -// value: If `True`, retain reduced dimensions with length `1`. -// If not specified, defaults to false -func ReduceJoinKeepDims(value bool) ReduceJoinAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } -} +// MaxPoolV2Attr is an optional argument to MaxPoolV2. +type MaxPoolV2Attr func(optionalAttr) -// ReduceJoinSeparator sets the optional separator attribute to value. +// MaxPoolV2DataFormat sets the optional data_format attribute to value. // -// value: The separator to use when joining. -// If not specified, defaults to "" -func ReduceJoinSeparator(value string) ReduceJoinAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolV2DataFormat(value string) MaxPoolV2Attr { return func(m optionalAttr) { - m["separator"] = value + m["data_format"] = value } } -// Joins a string Tensor across the given dimensions. -// -// Computes the string join across dimensions in the given string Tensor of shape -// `[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input -// strings with the given separator (default: empty string). Negative indices are -// counted backwards from the end, with `-1` being equivalent to `n - 1`. If -// indices are not specified, joins across all dimensions beginning from `n - 1` -// through `0`. -// -// For example: -// -// ```python -// # tensor `a` is [["a", "b"], ["c", "d"]] -// tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] -// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] -// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] -// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] -// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] -// tf.reduce_join(a, [0, 1]) ==> "acbd" -// tf.reduce_join(a, [1, 0]) ==> "abcd" -// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]] -// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd" -// ``` +// Performs max pooling on the input. // // Arguments: -// inputs: The input to be joined. All reduced indices must have non-zero size. -// reduction_indices: The dimensions to reduce over. Dimensions are reduced in the -// order specified. Omitting `reduction_indices` is equivalent to passing -// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. +// input: 4-D input to pool over. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. // -// Returns Has shape equal to that of the input with reduced dimensions removed or -// set to `1` depending on `keep_dims`. -func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) { +// Returns The max pooled output tensor. +func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ReduceJoin", + Type: "MaxPoolV2", Input: []tf.Input{ - inputs, reduction_indices, + input, ksize, strides, }, Attrs: attrs, } @@ -19642,165 +18959,137 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt return op.Output(0) } -// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. -type DenseToSparseSetOperationAttr func(optionalAttr) - -// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of `Tensor` and `SparseTensor`. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, -// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same -// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set2` -// indices. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the -// max set size across `n-1` dimensions. +// Does nothing. Serves as a control trigger for scheduling. // +// Only useful as a placeholder for control edges. // -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +// Returns the created operation. +func ControlTrigger(scope *Scope) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "DenseToSparseSetOperation", - Input: []tf.Input{ - set1, set2_indices, set2_values, set2_shape, - }, - Attrs: attrs, + Type: "ControlTrigger", } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// L2 Loss. -// -// Computes half the L2 norm of a tensor without the `sqrt`: -// -// output = sum(t ** 2) / 2 -// -// Arguments: -// t: Typically 2-D, but may have any dimensions. +// Deprecated. Use TensorArrayReadV3 // -// Returns 0-D. -func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayReadV3 +func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "L2Loss", + Type: "TensorArrayReadV2", Input: []tf.Input{ - t, + handle, index, flow_in, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes cos of x element-wise. -func Cos(scope *Scope, x tf.Output) (y tf.Output) { +// Batch normalization. +// +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. Prefer `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// beta: A 1D beta Tensor with size matching the last dimension of t. +// An offset to be added to the normalized tensor. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this tensor will be multiplied +// with the normalized tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "Cos", + Type: "BatchNormWithGlobalNormalization", Input: []tf.Input{ - x, + t, m, v, beta, gamma, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. -type FusedBatchNormGradAttr func(optionalAttr) +// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap. +type AddManySparseToTensorsMapAttr func(optionalAttr) -// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. +// AddManySparseToTensorsMapContainer sets the optional container attribute to value. // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { +// value: The container name for the `SparseTensorsMap` created by this op. +// If not specified, defaults to "" +func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr { return func(m optionalAttr) { - m["epsilon"] = value + m["container"] = value } } -// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. +// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value. // -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { +// value: The shared name for the `SparseTensorsMap` created by this op. +// If blank, the new Operation's unique name is used. +// If not specified, defaults to "" +func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr { return func(m optionalAttr) { - m["data_format"] = value + m["shared_name"] = value } } -// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. +// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { - return func(m optionalAttr) { - m["is_training"] = value - } -} - -// Gradient for batch normalization. +// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`, +// `sparse_values`, and `sparse_shape`, where // -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R``` +// +// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor` +// having a first `sparse_indices` column taking values between `[0, N)`, where +// the minibatch size `N == sparse_shape[0]`. +// +// The input `SparseTensor` must have rank `R` greater than 1, and the first +// dimension is treated as the minibatch dimension. Elements of the `SparseTensor` +// must be sorted in increasing order of this first dimension. The stored +// `SparseTensor` objects pointed to by each row of the output `sparse_handles` +// will have rank `R-1`. +// +// The `SparseTensor` values can then be read out as part of a minibatch by passing +// the given keys as vector elements to `TakeManySparseFromTensorsMap`. To ensure +// the correct `SparseTensorsMap` is accessed, ensure that the same +// `container` and `shared_name` are passed to that Op. If no `shared_name` +// is provided here, instead use the *name* of the Operation created by calling +// `AddManySparseToTensorsMap` as the `shared_name` passed to +// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. // // Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. +// sparse_indices: 2-D. The `indices` of the minibatch `SparseTensor`. +// `sparse_indices[:, 0]` must be ordered values in `[0, N)`. +// sparse_values: 1-D. The `values` of the minibatch `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the minibatch `SparseTensor`. +// The minibatch size `N == sparse_shape[0]`. // -// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { +// Returns 1-D. The handles of the `SparseTensor` now stored in the +// `SparseTensorsMap`. Shape: `[N]`. +func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) { if scope.Err() != nil { return } @@ -19809,200 +19098,243 @@ func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale t a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNormGrad", + Type: "AddManySparseToTensorsMap", Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0) } -// TopKAttr is an optional argument to TopK. -type TopKAttr func(optionalAttr) +// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata. +type TPUReplicateMetadataAttr func(optionalAttr) -// TopKSorted sets the optional sorted attribute to value. +// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value. // -// value: If true the resulting `k` elements will be sorted by the values in -// descending order. -// If not specified, defaults to true -func TopKSorted(value bool) TopKAttr { +// value: Number of cores per replica. Used for model parallelism. +// If not specified, defaults to 1 +func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr { return func(m optionalAttr) { - m["sorted"] = value + m["num_cores_per_replica"] = value } } -// Finds values and indices of the `k` largest elements for the last dimension. -// -// DEPRECATED at GraphDef version 7: Use TopKV2 instead -// -// If the input is a vector (rank-1), finds the `k` largest entries in the vector -// and outputs their values and indices as vectors. Thus `values[j]` is the -// `j`-th largest entry in `input`, and its index is `indices[j]`. -// -// For matrices (resp. higher rank input), computes the top `k` entries in each -// row (resp. vector along the last dimension). Thus, -// -// values.shape = indices.shape = input.shape[:-1] + [k] +// TPUReplicateMetadataTopology sets the optional topology attribute to value. // -// If two elements are equal, the lower-index element appears first. +// value: TopologyProto indicating the topology of the TPU pod slice. +// If not specified, defaults to "" +func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["topology"] = value + } +} + +// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value. // -// If `k` varies dynamically, use `TopKV2` below. +// value: Whether to place the computation on the TPU. +// If not specified, defaults to true +func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["use_tpu"] = value + } +} + +// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value. // -// Arguments: -// input: 1-D or higher with last dimension at least `k`. -// k: Number of top elements to look for along the last dimension (along each -// row for matrices). +// value: The assignment of devices for the computation. +// If not specified, defaults to <> +func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["device_assignment"] = value + } +} + +// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value. // -// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. -func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) { - if scope.Err() != nil { - return +// value: DEPRECATED. Use num_cores_per_replica instead. +// If not specified, defaults to <> +func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["computation_shape"] = value } - attrs := map[string]interface{}{"k": k} - for _, a := range optional { - a(attrs) +} + +// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value. +// If not specified, defaults to <> +func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["host_compute_core"] = value } - opspec := tf.OpSpec{ - Type: "TopK", - Input: []tf.Input{ - input, - }, - Attrs: attrs, +} + +// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value. +// If not specified, defaults to <> +func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr { + return func(m optionalAttr) { + m["padding_map"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) } -// Converts each string in the input Tensor to its hash mod by a number of buckets. -// -// The hash function is deterministic on the content of the string within the -// process. -// -// Note that the hash function may change from time to time. -// This functionality will be deprecated and it's recommended to use -// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. +// Metadata indicaitng how the TPU computation should be replicated. // // Arguments: +// num_replicas: Number of replicas of the computation // -// num_buckets: The number of buckets. -// -// Returns A Tensor of the same shape as the input `string_tensor`. -func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { +// Returns the created operation. +func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_buckets": num_buckets} + attrs := map[string]interface{}{"num_replicas": num_replicas} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "StringToHashBucket", - Input: []tf.Input{ - string_tensor, - }, + Type: "TPUReplicateMetadata", + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace. -type StaticRegexReplaceAttr func(optionalAttr) +// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug. +type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr) -// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value. +// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If True, the replacement is global, otherwise the replacement -// is done only on the first match. -// If not specified, defaults to true -func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["replace_global"] = value + m["table_id"] = value } } -// Replaces the match of pattern in input with rewrite. +// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load FTRL embedding parameters with debug support. // -// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// input: The text to be processed. -// pattern: The regular expression to match the input. -// rewrite: The rewrite to be applied to the matched expression. +// parameters: Value of parameters used in the FTRL optimization algorithm. +// accumulators: Value of accumulators used in the FTRL optimization algorithm. +// linears: Value of linears used in the FTRL optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm. // -// Returns The text after applying pattern and rewrite. -func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) { +// +// +// Returns the created operation. +func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StaticRegexReplace", + Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug", Input: []tf.Input{ - input, + parameters, accumulators, linears, gradient_accumulators, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes gradients for the exponential linear (Elu) operation. +// Concatenates tensors along one dimension. // // Arguments: -// gradients: The backpropagated gradients to the corresponding Elu operation. -// outputs: The outputs of the corresponding Elu operation. +// values: List of `N` Tensors to concatenate. Their ranks and types must match, +// and their sizes must match in all dimensions except `concat_dim`. +// axis: 0-D. The dimension along which to concatenate. Must be in the +// range [-rank(values), rank(values)). // -// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, -// `gradients` otherwise. -func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { +// Returns A `Tensor` with the concatenation of values stacked along the +// `concat_dim` dimension. This tensor's shape matches that of `values` except +// in `concat_dim` where it has the sum of the sizes. +func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "EluGrad", + Type: "ConcatV2", Input: []tf.Input{ - gradients, outputs, + tf.OutputList(values), axis, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradient of `igamma(a, x)` wrt `a`. -func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { +// Reads and outputs the entire contents of the input filename. +func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IgammaGradA", + Type: "ReadFile", Input: []tf.Input{ - a, x, + filename, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that contains `count` elements from the `input_dataset`. -// -// Arguments: +// AvgPoolGradAttr is an optional argument to AvgPoolGrad. +type AvgPoolGradAttr func(optionalAttr) + +// AvgPoolGradDataFormat sets the optional data_format attribute to value. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be taken. A value of `-1` indicates that all of `input_dataset` -// is taken. +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolGradDataFormat(value string) AvgPoolGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes gradients of the average pooling function. // +// Arguments: +// orig_input_shape: 1-D. Shape of the original input to `avg_pool`. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. +// the output of `avg_pool`. +// ksize: The size of the sliding window for each dimension of the input. +// strides: The stride of the sliding window for each dimension of the input. +// padding: The type of padding algorithm to use. // -func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { +// Returns 4-D. Gradients w.r.t. the input of `avg_pool`. +func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) { + if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TakeDataset", + Type: "AvgPoolGrad", Input: []tf.Input{ - input_dataset, count, + orig_input_shape, grad, }, Attrs: attrs, } @@ -20010,226 +19342,222 @@ func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_ return op.Output(0) } -// The gradient operator for the SparseAdd op. +// Greedily selects a subset of bounding boxes in descending order of score, // -// The SparseAdd op calculates A + B, where A, B, and the sum are all represented -// as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. -// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty -// values of A and B. +// pruning away boxes that have high overlaps +// with previously selected boxes. Bounding boxes with score less than +// `score_threshold` are removed. N-by-n overlap values are supplied as square matrix, +// which allows for defining a custom overlap criterium (eg. intersection over union, +// intersection over area, etc.). +// +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// +// selected_indices = tf.image.non_max_suppression_with_overlaps( +// overlaps, scores, max_output_size, overlap_threshold, score_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) // // Arguments: -// backprop_val_grad: 1-D with shape `[nnz(sum)]`. The gradient with respect to -// the non-empty values of the sum. -// a_indices: 2-D. The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`. -// b_indices: 2-D. The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`. -// sum_indices: 2-D. The `indices` of the sum `SparseTensor`, size -// `[nnz(sum), ndims]`. +// overlaps: A 2-D float tensor of shape `[num_boxes, num_boxes]` representing +// the n-by-n box overlap values. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// overlap_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too. +// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove +// boxes based on score. // -// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the -// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the -// non-empty values of B. -func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) { +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.Output, max_output_size tf.Output, overlap_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseAddGrad", + Type: "NonMaxSuppressionWithOverlaps", Input: []tf.Input{ - backprop_val_grad, a_indices, b_indices, sum_indices, + overlaps, scores, max_output_size, overlap_threshold, score_threshold, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Returns a list of tensors with the same shapes and contents as the input +// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. +type FractionalAvgPoolGradAttr func(optionalAttr) + +// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. // -// tensors. +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: // -// This op can be used to override the gradient for complicated functions. For -// example, suppose y = f(x) and we wish to apply a custom function g for backprop -// such that dx = g(dy). In Python, +// `index 0 1 2 3 4` // -// ```python -// with tf.get_default_graph().gradient_override_map( -// {'IdentityN': 'OverrideGradientWithG'}): -// y, _ = identity_n([f(x), x]) +// `value 20 5 16 3 7` // -// @tf.RegisterGradient('OverrideGradientWithG') -// def ApplyG(op, dy, _): -// return [None, g(dy)] # Do not backprop to f(x). -// ``` -func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) { +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [41/3, 26/3] for fractional avg pooling. +// If not specified, defaults to false +func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { + return func(m optionalAttr) { + m["overlapping"] = value + } +} + +// Computes gradient of the FractionalAvgPool function. +// +// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for +// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of +// out_backprop to those indices that form the same pooling cell. Therefore, we +// just need to know the shape of original input tensor, instead of the whole +// tensor. +// +// Arguments: +// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` +// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients +// w.r.t. the output of `fractional_avg_pool`. +// row_pooling_sequence: row pooling sequence, form pooling region with +// col_pooling_sequence. +// col_pooling_sequence: column pooling sequence, form pooling region with +// row_pooling sequence. +// +// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. +func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IdentityN", + Type: "FractionalAvgPoolGrad", Input: []tf.Input{ - tf.OutputList(input), + orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("IdentityN", err) - return - } - return output + return op.Output(0) } -// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp. -type ResourceApplyCenteredRMSPropAttr func(optionalAttr) +// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace. +type StaticRegexReplaceAttr func(optionalAttr) -// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value. // -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr { +// value: If True, the replacement is global, otherwise the replacement +// is done only on the first match. +// If not specified, defaults to true +func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["replace_global"] = value } } -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. -// -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// Replaces the match of pattern in input with rewrite. // -// mg <- rho * mg_{t-1} + (1-rho) * grad -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) -// var <- var - mom +// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) // // Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. -// -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. +// input: The text to be processed. +// pattern: The regular expression to match the input. +// rewrite: The rewrite to be applied to the matched expression. // -// Returns the created operation. -func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) { +// Returns The text after applying pattern and rewrite. +func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyCenteredRMSProp", + Type: "StaticRegexReplace", Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, + input, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. -type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// Computes gradients for the exponential linear (Elu) operation. // -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value +// Arguments: +// gradients: The backpropagated gradients to the corresponding Elu operation. +// outputs: The outputs of the corresponding Elu operation. +// +// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0, +// `gradients` otherwise. +func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "EluGrad", + Input: []tf.Input{ + gradients, outputs, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Update '*var' according to the centered RMSProp algorithm. -// -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. -// -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. +// Converts each string in the input Tensor to its hash mod by a number of buckets. // -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// The hash function is deterministic on the content of the string within the +// process. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// Note that the hash function may change from time to time. +// This functionality will be deprecated and it's recommended to use +// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`. // // Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// rho: Decay rate. Must be a scalar. // -// epsilon: Ridge term. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var, ms and mom. +// num_buckets: The number of buckets. // -// Returns the created operation. -func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "ResourceSparseApplyCenteredRMSProp", + Type: "StringToHashBucket", Input: []tf.Input{ - var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices, + string_tensor, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } // Creates a dataset that batches `batch_size` elements from `input_dataset`. // // Arguments: // -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. +// batch_size: A scalar representing the number of elements to accumulate in a batch. +// drop_remainder: A scalar representing whether the last batch should be dropped in case its size +// is smaller than desired. // // -func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "BatchDataset", + Type: "BatchDatasetV2", Input: []tf.Input{ - input_dataset, batch_size, + input_dataset, batch_size, drop_remainder, }, Attrs: attrs, } @@ -20237,61 +19565,78 @@ func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, o return op.Output(0) } -// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. -type RandomPoissonV2Attr func(optionalAttr) +// Computes the gradient of `igamma(a, x)` wrt `a`. +func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IgammaGradA", + Input: []tf.Input{ + a, x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// RandomPoissonV2Seed sets the optional seed attribute to value. +// Creates a dataset that contains `count` elements from the `input_dataset`. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { - return func(m optionalAttr) { - m["seed"] = value +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be taken. A value of `-1` indicates that all of `input_dataset` +// is taken. +// +// +func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "TakeDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { +// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. +type FakeQuantWithMinMaxVarsAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { return func(m optionalAttr) { - m["seed2"] = value + m["num_bits"] = value } } -// RandomPoissonV2Dtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT64 -func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { +// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { return func(m optionalAttr) { - m["dtype"] = value + m["narrow_range"] = value } } -// Outputs random values from the Poisson distribution(s) described by rate. -// -// This op uses two algorithms, depending on rate. If rate >= 10, then -// the algorithm by Hormann is used to acquire samples via -// transformation-rejection. -// See http://www.sciencedirect.com/science/article/pii/0167668793909974. +// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` // -// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform -// random variables. -// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer -// Programming, Volume 2. Addison Wesley +// and `max` to 'outputs' tensor of same shape as `inputs`. // -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in rate. -// rate: A tensor in which each scalar is a "rate" parameter describing the -// associated poisson distribution. +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive. // -// Returns A tensor with shape `shape + shape(rate)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `rate[i0, i1, ...iN]`. -func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { +// This operation has a gradient and thus allows for training `min` and `max` +// values. +func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { if scope.Err() != nil { return } @@ -20300,9 +19645,9 @@ func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ... a(attrs) } opspec := tf.OpSpec{ - Type: "RandomPoissonV2", + Type: "FakeQuantWithMinMaxVars", Input: []tf.Input{ - shape, rate, + inputs, min, max, }, Attrs: attrs, } @@ -20310,104 +19655,105 @@ func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ... return op.Output(0) } -// DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg. -type DecodeAndCropJpegAttr func(optionalAttr) +// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters. +type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr) -// DecodeAndCropJpegChannels sets the optional channels attribute to value. +// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeAndCropJpegChannels(value int64) DecodeAndCropJpegAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { - m["channels"] = value + m["table_id"] = value } } -// DecodeAndCropJpegRatio sets the optional ratio attribute to value. -// -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeAndCropJpegRatio(value int64) DecodeAndCropJpegAttr { +// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr { return func(m optionalAttr) { - m["ratio"] = value + m["table_name"] = value } } -// DecodeAndCropJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. +// Retrieve Momentum embedding parameters. // -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeAndCropJpegFancyUpscaling(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm. +func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) { + if scope.Err() != nil { + return } -} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingMomentumParameters", -// DecodeAndCropJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. -// -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeAndCropJpegTryRecoverTruncated(value bool) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// DecodeAndCropJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. +// Forwards the value of an available tensor from `inputs` to `output`. // -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeAndCropJpegAcceptableFraction(value float32) DecodeAndCropJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value +// `Merge` waits for at least one of the tensors in `inputs` to become available. +// It is usually combined with `Switch` to implement branching. +// +// `Merge` forwards the first tensor to become available to `output`, and sets +// `value_index` to its index in `inputs`. +// +// Arguments: +// inputs: The input tensors, exactly one of which will become available. +// +// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`. +func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Merge", + Input: []tf.Input{ + tf.OutputList(inputs), + }, } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// DecodeAndCropJpegDctMethod sets the optional dct_method attribute to value. +// QueueCloseV2Attr is an optional argument to QueueCloseV2. +type QueueCloseV2Attr func(optionalAttr) + +// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value. // -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) -// If not specified, defaults to "" -func DecodeAndCropJpegDctMethod(value string) DecodeAndCropJpegAttr { +// value: If true, all pending enqueue requests that are +// blocked on the given queue will be canceled. +// If not specified, defaults to false +func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr { return func(m optionalAttr) { - m["dct_method"] = value + m["cancel_pending_enqueues"] = value } } -// Decode and Crop a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// +// Closes the given queue. // -// It is equivalent to a combination of decode and crop, but much faster by only -// decoding partial jpeg image. +// This operation signals that no more elements will be enqueued in the +// given queue. Subsequent Enqueue(Many) operations will fail. +// Subsequent Dequeue(Many) operations will continue to succeed if +// sufficient elements remain in the queue. Subsequent Dequeue(Many) +// operations that would block will fail immediately. // // Arguments: -// contents: 0-D. The JPEG-encoded image. -// crop_window: 1-D. The crop window: [crop_y, crop_x, crop_height, crop_width]. +// handle: The handle to a queue. // -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, optional ...DecodeAndCropJpegAttr) (image tf.Output) { +// Returns the created operation. +func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -20416,75 +19762,67 @@ func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeAndCropJpeg", + Type: "QueueCloseV2", Input: []tf.Input{ - contents, crop_window, + handle, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Adds two `SparseTensor` objects to produce another `SparseTensor`. -// -// The input `SparseTensor` objects' indices are assumed ordered in standard -// lexicographic order. If this is not the case, before this step run -// `SparseReorder` to restore index ordering. -// -// By default, if two values sum to zero at some index, the output `SparseTensor` -// would still include that particular location in its index, storing a zero in the -// corresponding value slot. To override this, callers can specify `thresh`, -// indicating that if the sum has a magnitude strictly smaller than `thresh`, its -// corresponding value and index would then not be included. In particular, -// `thresh == 0` (default) means everything is kept and actual thresholding happens -// only for a positive value. -// -// In the following shapes, `nnz` is the count after taking `thresh` into account. +// Writes the given dataset to the given file using the TFRecord format. // // Arguments: -// a_indices: 2-D. The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix. -// a_values: 1-D. The `values` of the first `SparseTensor`, size `[nnz]` Vector. -// a_shape: 1-D. The `shape` of the first `SparseTensor`, size `[ndims]` Vector. -// b_indices: 2-D. The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix. -// b_values: 1-D. The `values` of the second `SparseTensor`, size `[nnz]` Vector. -// b_shape: 1-D. The `shape` of the second `SparseTensor`, size `[ndims]` Vector. -// thresh: 0-D. The magnitude threshold that determines if an output value/index -// pair takes space. -func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) { +// input_dataset: A variant tensor representing the dataset to write. +// filename: A scalar string tensor representing the filename to use. +// compression_type: A scalar string tensor containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// +// Returns the created operation. +func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseAdd", + Type: "ExperimentalDatasetToTFRecord", Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh, + input_dataset, filename, compression_type, }, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// QuantizedRelu6Attr is an optional argument to QuantizedRelu6. -type QuantizedRelu6Attr func(optionalAttr) +// BiasAddGradAttr is an optional argument to BiasAddGrad. +type BiasAddGradAttr func(optionalAttr) -// QuantizedRelu6OutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr { +// BiasAddGradDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the bias tensor will be added to the last dimension +// of the value tensor. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// The tensor will be added to "in_channels", the third-to-the-last +// dimension. +// If not specified, defaults to "NHWC" +func BiasAddGradDataFormat(value string) BiasAddGradAttr { return func(m optionalAttr) { - m["out_type"] = value + m["data_format"] = value } } -// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)` +// The backward operation for "BiasAdd" on the "bias" tensor. // -// Arguments: +// It accumulates all the values from out_backprop into the feature dimension. +// For NHWC data format, the feature dimension is the last. For NCHW data format, +// the feature dimension is the third-to-last. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. +// Arguments: +// out_backprop: Any number of dimensions. // -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns 1-D with size the feature dimension of `out_backprop`. +func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -20493,312 +19831,439 @@ func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, ma a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedRelu6", + Type: "BiasAddGrad", Input: []tf.Input{ - features, min_features, max_features, + out_backprop, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2. -type FixedLengthRecordReaderV2Attr func(optionalAttr) - -// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value. +// Reduces `input` from `num_devices` using `reduction` to a single device. // -// value: Number of bytes in the header, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["header_bytes"] = value - } -} - -// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value. +// Reduces `input` from `num_devices` using `reduction` to a single device. // -// value: Number of bytes in the footer, defaults to 0. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["footer_bytes"] = value +// The graph should be constructed so that all inputs have a valid device +// assignment, and the op itself is assigned one of these devices. +// +// input: The input to the reduction. +// data: the value of the reduction across all `num_devices` devices. +// reduction: the reduction operation to perform. +func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"reduction": reduction} + opspec := tf.OpSpec{ + Type: "NcclReduce", + Input: []tf.Input{ + tf.OutputList(input), + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value. +// Computes the gradient of morphological 2-D dilation with respect to the input. // -// value: Number of bytes to hop before each read. Default of 0 means using -// record_bytes. -// If not specified, defaults to 0 -func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["hop_bytes"] = value +// Arguments: +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. +// strides: 1-D of length 4. The stride of the sliding window for each dimension of +// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: 1-D of length 4. The input stride for atrous morphological dilation. +// Must be: `[1, rate_height, rate_width, 1]`. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape `[batch, in_height, in_width, depth]`. +func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} + opspec := tf.OpSpec{ + Type: "Dilation2DBackpropInput", + Input: []tf.Input{ + input, filter, out_backprop, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FixedLengthRecordReaderV2Container sets the optional container attribute to value. +// An Op to sum inputs across replicated TPU instances. // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value +// Each instance supplies its own input. +// +// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`. +// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0, +// and `B, D, F, H` as group 1. Thus we get the outputs: +// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`. +// +// Arguments: +// input: The local input to the sum. +// group_assignment: An int32 tensor with shape +// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the +// replica ids in the ith subgroup. +// +// Returns The sum of all the distributed inputs. +func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CrossReplicaSum", + Input: []tf.Input{ + input, group_assignment, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value. +// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum. +type ResourceSparseApplyMomentumAttr func(optionalAttr) + +// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["use_locking"] = value } } -// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value. +// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// value: The type of encoding for the file. Currently ZLIB and GZIP -// are supported. Defaults to none. -// If not specified, defaults to "" -func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr { +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr { return func(m optionalAttr) { - m["encoding"] = value + m["use_nesterov"] = value } } -// A Reader that outputs fixed-length records from a file. +// Update relevant entries in '*var' and '*accum' according to the momentum scheme. +// +// Set use_nesterov = True if you want to use Nesterov momentum. +// +// That is for rows we have grad for, we update var and accum as follows: +// +// accum = accum * momentum + grad +// var -= lr * accum // // Arguments: -// record_bytes: Number of bytes in the record. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// momentum: Momentum. Must be a scalar. // -// Returns The handle to reference the Reader. -func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) { +// Returns the created operation. +func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"record_bytes": record_bytes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FixedLengthRecordReaderV2", - + Type: "ResourceSparseApplyMomentum", + Input: []tf.Input{ + var_, accum, lr, grad, indices, momentum, + }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Return a tensor with the same shape and contents as the input tensor or value. -func Identity(scope *Scope, input tf.Output) (output tf.Output) { +// An Op to permute tensors across replicated TPU instances. +// +// Each instance supplies its own input. +// +// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing +// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs: +// `[D, A, B, C]`. +// +// Arguments: +// input: The local input to be permuted. Currently only supports float and +// bfloat16. +// source_target_pairs: A tensor with shape [num_pairs, 2]. +// +// Returns The permuted input. +func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Identity", + Type: "CollectivePermute", Input: []tf.Input{ - input, + input, source_target_pairs, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes arctangent of `y/x` element-wise, respecting signs of the arguments. +// Returns the complex conjugate of a complex number. // -// This is the angle \( \theta \in [-\pi, \pi] \) such that -// \[ x = r \cos(\theta) \] -// and -// \[ y = r \sin(\theta) \] -// where \(r = \sqrt(x^2 + y^2) \). -func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) { +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// complex numbers that are the complex conjugate of each element in `input`. The +// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the +// real part and *b* is the imaginary part. +// +// The complex conjugate returned by this operation is of the form \\(a - bj\\). +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j] +// ``` +func Conj(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Atan2", + Type: "Conj", Input: []tf.Input{ - y, x, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// AudioSummaryAttr is an optional argument to AudioSummary. -type AudioSummaryAttr func(optionalAttr) +// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters. +type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) -// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve centered RMSProp embedding parameters. // -// REQUIRES: value >= 1 -func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the centered RMSProp optimization algorithm.Parameter ms updated by the centered RMSProp optimization algorithm.Parameter mom updated by the centered RMSProp optimization algorithm.Parameter mg updated by the centered RMSProp optimization algorithm. +func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) +} + +// StringSplitAttr is an optional argument to StringSplit. +type StringSplitAttr func(optionalAttr) + +// StringSplitSkipEmpty sets the optional skip_empty attribute to value. +// +// value: A `bool`. If `True`, skip the empty strings from the result. +// If not specified, defaults to true +func StringSplitSkipEmpty(value bool) StringSplitAttr { return func(m optionalAttr) { - m["max_outputs"] = value + m["skip_empty"] = value } } -// Outputs a `Summary` protocol buffer with audio. +// Split elements of `input` based on `delimiter` into a `SparseTensor`. // -// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. +// Let N be the size of source (typically N will be the batch size). Split each +// element of `input` based on `delimiter` and return a `SparseTensor` +// containing the splitted tokens. Empty tokens are ignored. // -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// `delimiter` can be empty, or a string of split characters. If `delimiter` is an +// empty string, each element of `input` is split into individual single-byte +// character strings, including splitting of UTF-8 multibyte sequences. Otherwise +// every character of `delimiter` is a potential split point. // -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: +// For example: +// N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output +// will be // -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// indices = [0, 0; +// 0, 1; +// 1, 0; +// 1, 1; +// 1, 2] +// shape = [2, 3] +// values = ['hello', 'world', 'a', 'b', 'c'] // // Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. +// input: 1-D. Strings to split. +// delimiter: 0-D. Delimiter characters (bytes), or empty string. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { +// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse +// tensor, where the first value is N and the second value is the maximum number +// of tokens in a single input entry. +func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"sample_rate": sample_rate} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "AudioSummary", + Type: "StringSplit", Input: []tf.Input{ - tag, tensor, + input, delimiter, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// QrAttr is an optional argument to Qr. -type QrAttr func(optionalAttr) +// MaxPool3DAttr is an optional argument to MaxPool3D. +type MaxPool3DAttr func(optionalAttr) -// QrFullMatrices sets the optional full_matrices attribute to value. +// MaxPool3DDataFormat sets the optional data_format attribute to value. // -// value: If true, compute full-sized `q` and `r`. If false -// (the default), compute only the leading `P` columns of `q`. -// If not specified, defaults to false -func QrFullMatrices(value bool) QrAttr { +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DDataFormat(value string) MaxPool3DAttr { return func(m optionalAttr) { - m["full_matrices"] = value + m["data_format"] = value } } -// Computes the QR decompositions of one or more matrices. -// -// Computes the QR decomposition of each inner matrix in `tensor` such that -// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` -// -// ```python -// # a is a tensor. -// # q is a tensor of orthonormal matrices. -// # r is a tensor of upper triangular matrices. -// q, r = qr(a) -// q_full, r_full = qr(a, full_matrices=True) -// ``` +// Performs 3D max pooling on the input. // // Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. // -// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then -// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is -// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. -func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { +// Returns The max pooled output tensor. +func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Qr", + Type: "MaxPool3D", Input: []tf.Input{ input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Check if the input matches the regex pattern. -// -// The input is a string tensor of any shape. The pattern is the -// regular expression to be matched with every element of the input tensor. -// The boolean values (True or False) of the output tensor indicate -// if the input matches the regex pattern provided. +// Convert JSON-encoded Example records to binary protocol buffer strings. // -// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) +// This op translates a tensor containing Example records, encoded using +// the [standard JSON +// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json), +// into a tensor containing the same records encoded as binary protocol +// buffers. The resulting tensor can then be fed to any of the other +// Example-parsing ops. // // Arguments: -// input: A string tensor of the text to be processed. -// pattern: The regular expression to match the input. +// json_examples: Each string is a JSON object serialized according to the JSON +// mapping of the Example proto. // -// Returns A bool tensor with the same shape as `input`. -func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { +// Returns Each string is a binary Example protocol buffer corresponding +// to the respective element of `json_examples`. +func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"pattern": pattern} opspec := tf.OpSpec{ - Type: "StaticRegexFullMatch", + Type: "DecodeJSONExample", Input: []tf.Input{ - input, + json_examples, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. -type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) +// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. +type QueueEnqueueManyV2Attr func(optionalAttr) -// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: If True, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr { +// value: If the queue is too full, this operation will block for up +// to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { return func(m optionalAttr) { - m["use_locking"] = value + m["timeout_ms"] = value } } -// Sparse update '*var' as FOBOS algorithm with fixed learning rate. +// Enqueues zero or more tuples of one or more tensors in the given queue. // -// That is for rows we have grad for, we update var as follows: -// prox_v = var - alpha * grad -// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +// This operation slices each component tensor along the 0th dimension to +// make multiple queue elements. All of the tuple components must have the +// same size in the 0th dimension. +// +// The components input has k elements, which correspond to the components of +// tuples stored in the given queue. +// +// N.B. If the queue is full, this operation will block until the given +// elements have been enqueued (or 'timeout_ms' elapses, if specified). // // Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// indices: A vector of indices into the first dimension of var and accum. +// handle: The handle to a queue. +// components: One or more tensors from which the enqueued tensors should +// be taken. // // Returns the created operation. -func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) { +func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -20807,96 +20272,37 @@ func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, al a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceSparseApplyProximalGradientDescent", + Type: "QueueEnqueueManyV2", Input: []tf.Input{ - var_, alpha, l1, l2, grad, indices, + handle, tf.OutputList(components), }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Real-valued fast Fourier transform. -// -// Computes the 1-dimensional discrete Fourier transform of a real-valued signal -// over the inner-most dimension of `input`. +// PrintV2Attr is an optional argument to PrintV2. +type PrintV2Attr func(optionalAttr) + +// PrintV2OutputStream sets the optional output_stream attribute to value. // -// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the -// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, -// followed by the `fft_length / 2` positive-frequency terms. +// value: A string specifying the output stream or logging level to print to. +// If not specified, defaults to "stderr" +func PrintV2OutputStream(value string) PrintV2Attr { + return func(m optionalAttr) { + m["output_stream"] = value + } +} + +// Prints a string scalar. // -// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Prints a string scalar to the desired output_stream. // // Arguments: -// input: A float32 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A complex64 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length / 2 + 1` unique -// frequency components of its 1D Fourier transform. -// -// @compatibility(numpy) -// Equivalent to np.fft.rfft -// @end_compatibility -func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "RFFT", - Input: []tf.Input{ - input, fft_length, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Adds a value to the current value of a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to -// see the incremented value or a subsequent newer one. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// input: The string scalar to print. // // Returns the created operation. -func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignAddVariableOp", - Input: []tf.Input{ - resource, value, - }, - } - return scope.AddOperation(opspec) -} - -// QuantizedReluAttr is an optional argument to QuantizedRelu. -type QuantizedReluAttr func(optionalAttr) - -// QuantizedReluOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Computes Quantized Rectified Linear: `max(features, 0)` -// -// Arguments: -// -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. -// -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -20905,96 +20311,98 @@ func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedRelu", + Type: "PrintV2", Input: []tf.Input{ - features, min_features, max_features, + input, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Reorders a SparseTensor into the canonical, row-major ordering. -// -// Note that by convention, all sparse ops preserve the canonical ordering along -// increasing dimension number. The only time ordering can be violated is during -// manual manipulation of the indices and values vectors to add entries. -// -// Reordering does not affect the shape of the SparseTensor. +// The gradient operator for the SparseSlice op. // -// If the tensor has rank `R` and `N` non-empty values, `input_indices` has -// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`. +// This op takes in the upstream gradient w.r.t. non-empty values of +// the sliced `SparseTensor`, and outputs the gradients w.r.t. +// the non-empty values of input `SparseTensor`. // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. +// backprop_val_grad: 1-D. The gradient with respect to +// the non-empty values of the sliced `SparseTensor`. +// input_indices: 2-D. The `indices` of the input `SparseTensor`. +// input_start: 1-D. tensor represents the start of the slice. +// output_indices: 2-D. The `indices` of the sliced `SparseTensor`. // -// Returns 2-D. `N x R` matrix with the same indices as input_indices, but -// in canonical row-major ordering.1-D. `N` non-empty values corresponding to `output_indices`. -func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { +// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`. +func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseReorder", + Type: "SparseSliceGrad", Input: []tf.Input{ - input_indices, input_values, input_shape, + backprop_val_grad, input_indices, input_start, output_indices, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Computes rectified linear: `max(features, 0)`. -func Relu(scope *Scope, features tf.Output) (activations tf.Output) { +// Creates a dataset by applying optimizations to `input_dataset`. +// +// Creates a dataset by applying optimizations to `input_dataset`. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use. +// +// +func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Relu", + Type: "OptimizeDataset", Input: []tf.Input{ - features, + input_dataset, optimizations, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. -type ResourceApplyAddSignAttr func(optionalAttr) +// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. +type ResourceApplyProximalAdagradAttr func(optionalAttr) -// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. +// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var and m tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { +func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the AddSign update. +// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. // -// m_t <- beta1 * m_{t-1} + (1 - beta1) * g -// update <- (alpha + sign_decay * sign(g) *sign(m)) * g -// variable <- variable - lr_t * update +// accum += grad * grad +// prox_v = var - lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} // // Arguments: // var_: Should be from a Variable(). -// m: Should be from a Variable(). +// accum: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// alpha: Must be a scalar. -// sign_decay: Must be a scalar. -// beta: Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. // grad: The gradient. // // Returns the created operation. -func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { +func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -21003,263 +20411,224 @@ func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Outpu a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAddSign", + Type: "ResourceApplyProximalAdagrad", Input: []tf.Input{ - var_, m, lr, alpha, sign_decay, beta, grad, + var_, accum, lr, l1, l2, grad, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Component-wise divides a SparseTensor by a dense Tensor. -// -// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not -// the other direction. -// -// Arguments: -// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// dense: `R`-D. The dense Tensor operand. +// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. +type MutableHashTableOfTensorsV2Attr func(optionalAttr) + +// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. // -// Returns 1-D. The `N` values that are operated on. -func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseDenseCwiseDiv", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, dense, - }, +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["container"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad. -type FractionalAvgPoolGradAttr func(optionalAttr) - -// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` +// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [41/3, 26/3] for fractional avg pooling. +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. // If not specified, defaults to false -func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr { +func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { return func(m optionalAttr) { - m["overlapping"] = value + m["use_node_name_sharing"] = value } } -// Computes gradient of the FractionalAvgPool function. +// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. +// If not specified, defaults to <> +func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// Creates an empty hash table. // -// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for -// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of -// out_backprop to those indices that form the same pooling cell. Therefore, we -// just need to know the shape of original input tensor, instead of the whole -// tensor. +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a vector. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // // Arguments: -// orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_avg_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. // -// Returns 4-D. Gradients w.r.t. the input of `fractional_avg_pool`. -func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) { +// Returns Handle to a table. +func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FractionalAvgPoolGrad", - Input: []tf.Input{ - orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence, - }, + Type: "MutableHashTableOfTensorsV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. -type QuantizedConv2DAttr func(optionalAttr) - -// QuantizedConv2DOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} +// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent. +type ResourceApplyProximalGradientDescentAttr func(optionalAttr) -// QuantizedConv2DDilations sets the optional dilations attribute to value. +// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { +// value: If True, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr { return func(m optionalAttr) { - m["dilations"] = value + m["use_locking"] = value } } -// Computes a 2D convolution given quantized 4D input and filter tensors. +// Update '*var' as FOBOS algorithm with fixed learning rate. // -// The inputs are quantized tensors where the lowest value represents the real -// number of the associated minimum, and the highest represents the maximum. -// This means that you can only interpret the quantized output in the same way, by -// taking the returned minimum and maximum values into account. +// prox_v = var - alpha * delta +// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} // // Arguments: +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// delta: The change. // -// filter: filter's input_depth dimension must match input's depth dimensions. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// min_filter: The float value that the lowest quantized filter value represents. -// max_filter: The float value that the highest quantized filter value represents. -// strides: The stride of the sliding window for each dimension of the input -// tensor. -// padding: The type of padding algorithm to use. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { +// Returns the created operation. +func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedConv2D", + Type: "ResourceApplyProximalGradientDescent", Input: []tf.Input{ - input, filter, min_input, max_input, min_filter, max_filter, + var_, alpha, l1, l2, delta, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// ResourceGatherAttr is an optional argument to ResourceGather. -type ResourceGatherAttr func(optionalAttr) - -// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } + return scope.AddOperation(opspec) } -// Gather slices from the variable pointed to by `resource` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: -// -// ```python -// # Scalar indices -// output[:, ..., :] = params[indices, :, ... :] +// Returns 0 if the denominator is zero. // -// # Vector indices -// output[i, :, ..., :] = params[indices[i], :, ... :] // -// # Higher rank indices -// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] -// ``` -func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { +// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceGather", + Type: "DivNoNan", Input: []tf.Input{ - resource, indices, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Delete the TensorArray from its resource container. +// Subtracts a value from the current value of a variable. // -// This enables the user to close and release the resource in the middle -// of a step/run. +// Any ReadVariableOp with a control dependency on this op is guaranteed to +// see the decremented value or a subsequent newer one. // // Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. // // Returns the created operation. -func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { +func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayCloseV3", + Type: "AssignSubVariableOp", Input: []tf.Input{ - handle, + resource, value, }, } return scope.AddOperation(opspec) } -// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. -type StatelessMultinomialAttr func(optionalAttr) +// RestoreAttr is an optional argument to Restore. +type RestoreAttr func(optionalAttr) -// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. -// If not specified, defaults to DT_INT64 -func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { +// RestorePreferredShard sets the optional preferred_shard attribute to value. +// +// value: Index of file to open first if multiple files match +// `file_pattern`. +// If not specified, defaults to -1 +func RestorePreferredShard(value int64) RestoreAttr { return func(m optionalAttr) { - m["output_dtype"] = value + m["preferred_shard"] = value } } -// Draws samples from a multinomial distribution. +// Restores a tensor from checkpoint files. +// +// Reads a tensor stored in one or several files. If there are several files (for +// instance because a tensor was saved as slices), `file_pattern` may contain +// wildcard symbols (`*` and `?`) in the filename portion only, not in the +// directory portion. +// +// If a `file_pattern` matches several files, `preferred_shard` can be used to hint +// in which file the requested tensor is likely to be found. This op will first +// open the file at index `preferred_shard` in the list of matching files and try +// to restore tensors from that file. Only if some tensors or tensor slices are +// not found in that first file, then the Op opens all the files. Setting +// `preferred_shard` to match the value passed as the `shard` input +// of a matching `Save` Op may speed up Restore. This attribute only affects +// performance, not correctness. The default value -1 means files are processed in +// order. +// +// See also `RestoreSlice`. // // Arguments: -// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` -// represents the unnormalized log probabilities for all classes. -// num_samples: 0-D. Number of independent samples to draw for each row slice. -// seed: 2 seeds (shape [2]). +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// dt: The type of the tensor to be restored. // -// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` -// contains the drawn class labels with range `[0, num_classes)`. -func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { +// Returns The restored tensor. +func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dt": dt} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessMultinomial", + Type: "Restore", Input: []tf.Input{ - logits, num_samples, seed, + file_pattern, tensor_name, }, Attrs: attrs, } @@ -21267,59 +20636,97 @@ func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, return op.Output(0) } -// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`. +// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. +type QuantizedResizeBilinearAttr func(optionalAttr) + +// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. // -// This Op does not require `a_indices` be sorted in standard lexicographic order. +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Resize quantized `images` to `size` using quantized bilinear interpolation. +// +// Input images and output images must be quantized types. // // Arguments: -// a_indices: 2-D. The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`. -// a_values: 1-D. The `values` of the `SparseTensor`, with shape `[nnz]`. -// a_shape: 1-D. The `shape` of the `SparseTensor`, with shape `[ndims]`. -// b: `ndims`-D Tensor. With shape `a_shape`. -func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) { +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. +// +// +// +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseTensorDenseAdd", + Type: "QuantizedResizeBilinear", Input: []tf.Input{ - a_indices, a_values, a_shape, b, + images, size, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// +// Arguments: +// +// num_threads: Identifies the number of threads to use for the private threadpool. +// +// +func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ExperimentalPrivateThreadPoolDataset", + Input: []tf.Input{ + input_dataset, num_threads, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation. -type SparseToSparseSetOperationAttr func(optionalAttr) +// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation. +type DenseToSparseSetOperationAttr func(optionalAttr) -// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value. // If not specified, defaults to true -func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr { +func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr { return func(m optionalAttr) { m["validate_indices"] = value } } -// Applies set operation along last dimension of 2 `SparseTensor` inputs. +// Applies set operation along last dimension of `Tensor` and `SparseTensor`. // // See SetOperationOp::SetOperationFromContext for values of `set_operation`. // -// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the -// order and range of `set1` and `set2` indices. -// -// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`, -// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same -// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but -// ignored. -// // Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`, // and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same // as `set1`. Dimension `n` contains values in a set, duplicates are allowed but // ignored. // -// If `validate_indices` is `True`, this op validates the order and range of `set1` -// and `set2` indices. +// If `validate_indices` is `True`, this op validates the order and range of `set2` +// indices. // // Output `result` is a `SparseTensor` represented by `result_indices`, // `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this @@ -21328,26 +20735,21 @@ func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOper // `[0...n-1]` dimension of `set`. // // Arguments: -// set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major -// order. -// set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major -// order. -// set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must -// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the -// max set size across `0...n-1` dimensions. -// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major +// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major // order. // set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major // order. // set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must -// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the -// max set size across `0...n-1` dimensions. +// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the +// max set size across `n-1` dimensions. // // // Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is // the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` // is the max result set size across all `0...n-1` dimensions. -func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { +func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } @@ -21356,9 +20758,9 @@ func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_value a(attrs) } opspec := tf.OpSpec{ - Type: "SparseToSparseSetOperation", + Type: "DenseToSparseSetOperation", Input: []tf.Input{ - set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape, + set1, set2_indices, set2_values, set2_shape, }, Attrs: attrs, } @@ -21366,157 +20768,64 @@ func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_value return op.Output(0), op.Output(1), op.Output(2) } -// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2. -type MutableDenseHashTableV2Attr func(optionalAttr) - -// MutableDenseHashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value. -// -// value: The shape of each value. -// If not specified, defaults to <> -func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value - } -} - -// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value. -// -// value: The initial number of hash table buckets. Must be a power -// to 2. -// If not specified, defaults to 131072 -func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["initial_num_buckets"] = value - } -} - -// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value. -// -// value: The maximum ratio between number of entries and number of -// buckets before growing the table. Must be between 0 and 1. -// If not specified, defaults to 0.8 -func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr { - return func(m optionalAttr) { - m["max_load_factor"] = value - } -} - -// Creates an empty hash table that uses tensors as the backing store. +// L2 Loss. // -// It uses "open addressing" with quadratic reprobing to resolve -// collisions. +// Computes half the L2 norm of a tensor without the `sqrt`: // -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// output = sum(t ** 2) / 2 // // Arguments: -// empty_key: The key used to represent empty key buckets internally. Must not -// be used in insert or lookup operations. -// -// value_dtype: Type of the table values. +// t: Typically 2-D, but may have any dimensions. // -// Returns Handle to a table. -func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { +// Returns 0-D. +func L2Loss(scope *Scope, t tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "MutableDenseHashTableV2", + Type: "L2Loss", Input: []tf.Input{ - empty_key, deleted_key, + t, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// UpperBoundAttr is an optional argument to UpperBound. -type UpperBoundAttr func(optionalAttr) +// StackV2Attr is an optional argument to StackV2. +type StackV2Attr func(optionalAttr) -// UpperBoundOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func UpperBoundOutType(value tf.DataType) UpperBoundAttr { +// StackV2StackName sets the optional stack_name attribute to value. +// +// value: Overrides the name used for the temporary stack resource. Default +// value is the name of the 'Stack' op (which is guaranteed unique). +// If not specified, defaults to "" +func StackV2StackName(value string) StackV2Attr { return func(m optionalAttr) { - m["out_type"] = value + m["stack_name"] = value } } -// Applies upper_bound(sorted_search_values, values) along each row. -// -// Each set of rows with the same index in (sorted_inputs, values) is treated -// independently. The resulting row is the equivalent of calling -// `np.searchsorted(sorted_inputs, values, side='right')`. -// -// The result is not a global index to the entire -// `Tensor`, but rather just the index in the last dimension. -// -// A 2-D example: -// sorted_sequence = [[0, 3, 9, 9, 10], -// [1, 2, 3, 4, 5]] -// values = [[2, 4, 9], -// [0, 2, 6]] -// -// result = UpperBound(sorted_sequence, values) -// -// result == [[1, 2, 4], -// [0, 2, 5]] +// A stack that produces elements in first-in last-out order. // // Arguments: -// sorted_inputs: 2-D Tensor where each row is ordered. -// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains -// the values that will be searched for in `sorted_search_values`. +// max_size: The maximum size of the stack if non-negative. If negative, the stack +// size is unlimited. +// elem_type: The type of the elements on the stack. // -// Returns A `Tensor` with the same shape as `values`. It contains the last scalar index -// into the last dimension where values can be inserted without changing the -// ordered property. -func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) { +// Returns The handle to the stack. +func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"elem_type": elem_type} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UpperBound", + Type: "StackV2", Input: []tf.Input{ - sorted_inputs, values, + max_size, }, Attrs: attrs, } @@ -21524,87 +20833,99 @@ func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optiona return op.Output(0) } -// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad. -type FractionalMaxPoolGradAttr func(optionalAttr) +// CudnnRNNBackpropAttr is an optional argument to CudnnRNNBackprop. +type CudnnRNNBackpropAttr func(optionalAttr) -// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` -// -// `value 20 5 16 3 7` -// -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [20, 16] for fractional max pooling. -// If not specified, defaults to false -func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr { +// CudnnRNNBackpropRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNBackpropRnnMode(value string) CudnnRNNBackpropAttr { return func(m optionalAttr) { - m["overlapping"] = value + m["rnn_mode"] = value } } -// Computes gradient of the FractionalMaxPool function. -// -// Arguments: -// orig_input: Original input for `fractional_max_pool` -// orig_output: Original output for `fractional_max_pool` -// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients -// w.r.t. the output of `fractional_max_pool`. -// row_pooling_sequence: row pooling sequence, form pooling region with -// col_pooling_sequence. -// col_pooling_sequence: column pooling sequence, form pooling region with -// row_pooling sequence. -// -// Returns 4-D. Gradients w.r.t. the input of `fractional_max_pool`. -func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) { - if scope.Err() != nil { - return +// CudnnRNNBackpropInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNBackpropInputMode(value string) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["input_mode"] = value } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +} + +// CudnnRNNBackpropDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNBackpropDirection(value string) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["direction"] = value } - opspec := tf.OpSpec{ - Type: "FractionalMaxPoolGrad", - Input: []tf.Input{ - orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence, - }, - Attrs: attrs, +} + +// CudnnRNNBackpropDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropDropout(value float32) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["dropout"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA. -type ResourceApplyAdagradDAAttr func(optionalAttr) +// CudnnRNNBackpropSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropSeed(value int64) CudnnRNNBackpropAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} -// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr { +// CudnnRNNBackpropSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNBackpropSeed2(value int64) CudnnRNNBackpropAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed2"] = value } } -// Update '*var' according to the proximal adagrad scheme. +// Backprop step of CudnnRNN. // -// Arguments: -// var_: Should be from a Variable(). -// gradient_accumulator: Should be from a Variable(). -// gradient_squared_accumulator: Should be from a Variable(). -// grad: The gradient. -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// global_step: Training step number. Must be a scalar. +// Compute the backprop of both data and weights in a RNN. // -// Returns the created operation. -func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) { +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// the actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. Should be +// "unidirectional" or "bidirectional". +// dropout: Dropout probability. When set to 0., dropout is disabled. +// seed: The 1st part of a seed to initialize dropout. +// seed2: The 2nd part of a seed to initialize dropout. +// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. +// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, +// num_units]. +// input_c: For LSTM, a 3-D tensor with the shape of +// [num_layer * dir, batch, num_units]. For other models, it is ignored. +// params: A 1-D tensor that contains the weights and biases in an opaque layout. +// The size must be created through CudnnRNNParamsSize, and initialized +// separately. Note that they might not be compatible across different +// generations. So it is a good idea to save and restore +// output: A 3-D tensor with the shape of [seq_length, batch_size, +// dir * num_units]. +// output_h: The same shape has input_h. +// output_c: The same shape as input_c for LSTM. An empty tensor for other models. +// output_backprop: A 3-D tensor with the same shape as output in the forward pass. +// output_h_backprop: A 3-D tensor with the same shape as output_h in the forward +// pass. +// output_c_backprop: A 3-D tensor with the same shape as output_c in the forward +// pass. +// reserve_space: The same reserve_space produced in for forward operation. +// input_backprop: The backprop to input in the forward pass. Has the same shape +// as input. +// input_h_backprop: The backprop to input_h in the forward pass. Has the same +// shape as input_h. +// input_c_backprop: The backprop to input_c in the forward pass. Has the same +// shape as input_c. +// params_backprop: The backprop to the params buffer in the forward pass. Has the +// same shape as params. +func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, output tf.Output, output_h tf.Output, output_c tf.Output, output_backprop tf.Output, output_h_backprop tf.Output, output_c_backprop tf.Output, reserve_space tf.Output, optional ...CudnnRNNBackpropAttr) (input_backprop tf.Output, input_h_backprop tf.Output, input_c_backprop tf.Output, params_backprop tf.Output) { if scope.Err() != nil { return } @@ -21613,50 +20934,60 @@ func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator t a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdagradDA", + Type: "CudnnRNNBackprop", Input: []tf.Input{ - var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, + input, input_h, input_c, params, output, output_h, output_c, output_backprop, output_h_backprop, output_c_backprop, reserve_space, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. -type SparseReduceMaxSparseAttr func(optionalAttr) +// InfeedEnqueueAttr is an optional argument to InfeedEnqueue. +type InfeedEnqueueAttr func(optionalAttr) -// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. +// InfeedEnqueueShape sets the optional shape attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { +// value: The shape of the tensor. +// If not specified, defaults to <> +func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["shape"] = value } } -// Computes the max of elements across dimensions of a SparseTensor. -// -// This Op takes a SparseTensor and is the sparse counterpart to -// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a -// SparseTensor. +// InfeedEnqueueLayout sets the optional layout attribute to value. // -// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained -// with length 1. +// value: A vector holding the requested layout in minor-to-major sequence. +// If a layout attribute is passed, but its values are all -1, the layout will +// be computed by the infeed operation. +// If not specified, defaults to <> +func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr { + return func(m optionalAttr) { + m["layout"] = value + } +} + +// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value. // -// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor -// with a single element is returned. Additionally, the axes can be negative, -// which are interpreted according to the indexing rules in Python. +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// An op which feeds a single Tensor value into the computation. // // Arguments: -// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, possibly not in canonical ordering. -// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. -// input_shape: 1-D. Shape of the input SparseTensor. -// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. -func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// input: A tensor that will be provided using the infeed mechanism. +// +// Returns the created operation. +func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -21665,135 +20996,98 @@ func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values t a(attrs) } opspec := tf.OpSpec{ - Type: "SparseReduceMaxSparse", + Type: "InfeedEnqueue", Input: []tf.Input{ - input_indices, input_values, input_shape, reduction_axes, + input, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Creates a dataset that emits the outputs of `input_dataset` `count` times. -// -// Arguments: -// -// count: A scalar representing the number of times that `input_dataset` should -// be repeated. A value of `-1` indicates that it should be repeated infinitely. -// +// Computes softmax cross entropy cost and gradients to backpropagate. // -func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "RepeatDataset", - Input: []tf.Input{ - input_dataset, count, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradient for the inverse of `x` wrt its input. +// Inputs are the logits, not probabilities. // -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReciprocalGrad", - Input: []tf.Input{ - y, dy, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the min of x and y (i.e. x < y ? x : y) element-wise. +// Arguments: +// features: batch_size x num_classes matrix +// labels: batch_size x num_classes matrix +// The caller must ensure that each batch of labels represents a valid +// probability distribution. // -// *NOTE*: `Minimum` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix). +func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Minimum", + Type: "SoftmaxCrossEntropyWithLogits", Input: []tf.Input{ - x, y, + features, labels, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// MfccAttr is an optional argument to Mfcc. -type MfccAttr func(optionalAttr) +// ReduceJoinAttr is an optional argument to ReduceJoin. +type ReduceJoinAttr func(optionalAttr) -// MfccUpperFrequencyLimit sets the optional upper_frequency_limit attribute to value. +// ReduceJoinKeepDims sets the optional keep_dims attribute to value. // -// value: The highest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 4000 -func MfccUpperFrequencyLimit(value float32) MfccAttr { +// value: If `True`, retain reduced dimensions with length `1`. +// If not specified, defaults to false +func ReduceJoinKeepDims(value bool) ReduceJoinAttr { return func(m optionalAttr) { - m["upper_frequency_limit"] = value + m["keep_dims"] = value } } -// MfccLowerFrequencyLimit sets the optional lower_frequency_limit attribute to value. +// ReduceJoinSeparator sets the optional separator attribute to value. // -// value: The lowest frequency to use when calculating the -// ceptstrum. -// If not specified, defaults to 20 -func MfccLowerFrequencyLimit(value float32) MfccAttr { +// value: The separator to use when joining. +// If not specified, defaults to "" +func ReduceJoinSeparator(value string) ReduceJoinAttr { return func(m optionalAttr) { - m["lower_frequency_limit"] = value + m["separator"] = value } } -// MfccFilterbankChannelCount sets the optional filterbank_channel_count attribute to value. +// Joins a string Tensor across the given dimensions. // -// value: Resolution of the Mel bank used internally. -// If not specified, defaults to 40 -func MfccFilterbankChannelCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["filterbank_channel_count"] = value - } -} - -// MfccDctCoefficientCount sets the optional dct_coefficient_count attribute to value. +// Computes the string join across dimensions in the given string Tensor of shape +// `[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input +// strings with the given separator (default: empty string). Negative indices are +// counted backwards from the end, with `-1` being equivalent to `n - 1`. If +// indices are not specified, joins across all dimensions beginning from `n - 1` +// through `0`. // -// value: How many output channels to produce per time slice. -// If not specified, defaults to 13 -func MfccDctCoefficientCount(value int64) MfccAttr { - return func(m optionalAttr) { - m["dct_coefficient_count"] = value - } -} - -// Transforms a spectrogram into a form that's useful for speech recognition. +// For example: // -// Mel Frequency Cepstral Coefficients are a way of representing audio data that's -// been effective as an input feature for machine learning. They are created by -// taking the spectrum of a spectrogram (a 'cepstrum'), and discarding some of the -// higher frequencies that are less significant to the human ear. They have a long -// history in the speech recognition world, and https://en.wikipedia.org/wiki/Mel-frequency_cepstrum -// is a good resource to learn more. +// ```python +// # tensor `a` is [["a", "b"], ["c", "d"]] +// tf.reduce_join(a, 0) ==> ["ac", "bd"] +// tf.reduce_join(a, 1) ==> ["ab", "cd"] +// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"] +// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"] +// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]] +// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]] +// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"] +// tf.reduce_join(a, [0, 1]) ==> "acbd" +// tf.reduce_join(a, [1, 0]) ==> "abcd" +// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]] +// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd" +// ``` // // Arguments: -// spectrogram: Typically produced by the Spectrogram op, with magnitude_squared -// set to true. -// sample_rate: How many samples per second the source audio used. -func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional ...MfccAttr) (output tf.Output) { +// inputs: The input to be joined. All reduced indices must have non-zero size. +// reduction_indices: The dimensions to reduce over. Dimensions are reduced in the +// order specified. Omitting `reduction_indices` is equivalent to passing +// `[n-1, n-2, ..., 0]`. Negative indices from `-n` to `-1` are supported. +// +// Returns Has shape equal to that of the input with reduced dimensions removed or +// set to `1` depending on `keep_dims`. +func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -21802,9 +21096,9 @@ func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional . a(attrs) } opspec := tf.OpSpec{ - Type: "Mfcc", + Type: "ReduceJoin", Input: []tf.Input{ - spectrogram, sample_rate, + inputs, reduction_indices, }, Attrs: attrs, } @@ -21812,286 +21106,388 @@ func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional . return op.Output(0) } -// Returns the element-wise sum of a list of tensors. -// -// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not -// wait for all of its inputs to be ready before beginning to sum. This can -// save memory if inputs are ready at different times, since minimum temporary -// storage is proportional to the output size rather than the inputs size. +// TopKAttr is an optional argument to TopK. +type TopKAttr func(optionalAttr) + +// TopKSorted sets the optional sorted attribute to value. // -// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. +// value: If true the resulting `k` elements will be sorted by the values in +// descending order. +// If not specified, defaults to true +func TopKSorted(value bool) TopKAttr { + return func(m optionalAttr) { + m["sorted"] = value + } +} + +// Finds values and indices of the `k` largest elements for the last dimension. // -// Returns a `Tensor` of same shape and type as the elements of `inputs`. +// DEPRECATED at GraphDef version 7: Use TopKV2 instead +// +// If the input is a vector (rank-1), finds the `k` largest entries in the vector +// and outputs their values and indices as vectors. Thus `values[j]` is the +// `j`-th largest entry in `input`, and its index is `indices[j]`. +// +// For matrices (resp. higher rank input), computes the top `k` entries in each +// row (resp. vector along the last dimension). Thus, +// +// values.shape = indices.shape = input.shape[:-1] + [k] +// +// If two elements are equal, the lower-index element appears first. +// +// If `k` varies dynamically, use `TopKV2` below. // // Arguments: -// inputs: A list of `Tensor` objects, each with same shape and type. -// shape: Shape of elements of `inputs`. -func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { +// input: 1-D or higher with last dimension at least `k`. +// k: Number of top elements to look for along the last dimension (along each +// row for matrices). +// +// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`. +func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shape": shape} + attrs := map[string]interface{}{"k": k} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "AccumulateNV2", + Type: "TopK", Input: []tf.Input{ - tf.OutputList(inputs), + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Outputs deterministic pseudorandom random integers from a uniform distribution. -// -// The generated values follow a uniform distribution in the range `[minval, maxval)`. +// BatchToSpace for N-D tensors of type T. // -// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`. +// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape +// `block_shape + [batch]`, interleaves these blocks back into the grid defined by +// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as +// the input. The spatial dimensions of this intermediate result are then +// optionally cropped according to `crops` to produce the output. This is the +// reverse of SpaceToBatch. See below for a precise description. // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). -// minval: Minimum value (inclusive, scalar). -// maxval: Maximum value (exclusive, scalar). +// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, +// where spatial_shape has M dimensions. +// block_shape: 1-D with shape `[M]`, all values must be >= 1. +// crops: 2-D with shape `[M, 2]`, all values must be >= 0. +// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input +// dimension `i + 1`, which corresponds to spatial dimension `i`. It is +// required that +// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. // -// Returns Random values with specified shape. -func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { +// This operation is equivalent to the following steps: +// +// 1. Reshape `input` to `reshaped` of shape: +// [block_shape[0], ..., block_shape[M-1], +// batch / prod(block_shape), +// input_shape[1], ..., input_shape[N-1]] +// +// 2. Permute dimensions of `reshaped` to produce `permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1], block_shape[0], +// ..., +// input_shape[M], block_shape[M-1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// 3. Reshape `permuted` to produce `reshaped_permuted` of shape +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0], +// ..., +// input_shape[M] * block_shape[M-1], +// +// input_shape[M+1], +// ..., +// input_shape[N-1]] +// +// 4. Crop the start and end of dimensions `[1, ..., M]` of +// `reshaped_permuted` according to `crops` to produce the output of shape: +// [batch / prod(block_shape), +// +// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], +// ..., +// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], +// +// input_shape[M+1], ..., input_shape[N-1]] +// +// Some examples: +// +// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 1]` and value: +// +// ``` +// x = [[[[1], [2]], [[3], [4]]]] +// ``` +// +// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 3]` and value: +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [0, 0]]`: +// +// ``` +// x = [[[[1], [3]], [[9], [11]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +// +// The output tensor has shape `[1, 4, 4, 1]` and value: +// +// ``` +// x = [[[1], [2], [3], [4]], +// [[5], [6], [7], [8]], +// [[9], [10], [11], [12]], +// [[13], [14], [15], [16]]] +// ``` +// +// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and +// `crops = [[0, 0], [2, 0]]`: +// +// ``` +// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], +// [[[0], [2], [4]]], [[[0], [10], [12]]], +// [[[0], [5], [7]]], [[[0], [13], [15]]], +// [[[0], [6], [8]]], [[[0], [14], [16]]]] +// ``` +// +// The output tensor has shape `[2, 2, 4, 1]` and value: +// +// ``` +// x = [[[[1], [2], [3], [4]], +// [[5], [6], [7], [8]]], +// [[[9], [10], [11], [12]], +// [[13], [14], [15], [16]]]] +// ``` +func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "StatelessRandomUniformInt", + Type: "BatchToSpaceND", Input: []tf.Input{ - shape, seed, minval, maxval, + input, block_shape, crops, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse fast Fourier transform. +// UnpackAttr is an optional argument to Unpack. +type UnpackAttr func(optionalAttr) + +// UnpackAxis sets the optional axis attribute to value. // -// Computes the inverse 1-dimensional discrete Fourier transform over the -// inner-most dimension of `input`. +// value: Dimension along which to unpack. Negative values wrap around, so the +// valid range is `[-R, R)`. +// If not specified, defaults to 0 +func UnpackAxis(value int64) UnpackAttr { + return func(m optionalAttr) { + m["axis"] = value + } +} + +// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. +// +// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. +// For example, given a tensor of shape `(A, B, C, D)`; +// +// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` +// and each tensor in `output` will have shape `(B, C, D)`. (Note that the +// dimension unpacked along is gone, unlike `split`). +// +// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` +// and each tensor in `output` will have shape `(A, C, D)`. +// Etc. +// +// This is the opposite of `pack`. // // Arguments: -// input: A complex tensor. +// value: 1-D or higher, with `axis` dimension size equal to `num`. // -// Returns A complex tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its inverse 1D Fourier transform. // -// @compatibility(numpy) -// Equivalent to np.fft.ifft -// @end_compatibility -func IFFT(scope *Scope, input tf.Output) (output tf.Output) { +// Returns The list of tensors unpacked from `value`. +func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num": num} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IFFT", + Type: "Unpack", Input: []tf.Input{ - input, + value, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("Unpack", err) + return + } + return output } -// 2D fast Fourier transform. -// -// Computes the 2-dimensional discrete Fourier transform over the inner-most -// 2 dimensions of `input`. +// Delete the stack from its resource container. // // Arguments: -// input: A complex tensor. -// -// Returns A complex tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their 2D Fourier transform. +// handle: The handle to a stack. // -// @compatibility(numpy) -// Equivalent to np.fft.fft2 -// @end_compatibility -func FFT2D(scope *Scope, input tf.Output) (output tf.Output) { +// Returns the created operation. +func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FFT2D", + Type: "StackCloseV2", Input: []tf.Input{ - input, + handle, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Inverse 2D fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform over the -// inner-most 2 dimensions of `input`. +// Increments variable pointed to by 'resource' until it reaches 'limit'. // // Arguments: -// input: A complex tensor. +// resource: Should be from a scalar `Variable` node. +// limit: If incrementing ref would bring it above limit, instead generates an +// 'OutOfRange' error. // -// Returns A complex tensor of the same shape as `input`. The inner-most 2 -// dimensions of `input` are replaced with their inverse 2D Fourier transform. // -// @compatibility(numpy) -// Equivalent to np.fft.ifft2 -// @end_compatibility -func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { +// Returns A copy of the input before increment. If nothing else modifies the +// input, the values produced will all be distinct. +func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"limit": limit, "T": T} opspec := tf.OpSpec{ - Type: "IFFT2D", + Type: "ResourceCountUpTo", Input: []tf.Input{ - input, + resource, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse 2D real-valued fast Fourier transform. -// -// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 2 dimensions of `input`. -// -// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 2 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along each axis `IRFFT2D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. +// Computes softsign gradients for a softsign operation. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [2]. The FFT length for each dimension. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most 2 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 2D Fourier transform. +// gradients: The backpropagated gradients to the corresponding softsign operation. +// features: The features passed as input to the corresponding softsign operation. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft2 -// @end_compatibility -func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. +func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IRFFT2D", + Type: "SoftsignGrad", Input: []tf.Input{ - input, fft_length, + gradients, features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodeJpegAttr is an optional argument to DecodeJpeg. -type DecodeJpegAttr func(optionalAttr) - -// DecodeJpegChannels sets the optional channels attribute to value. +// Provides the time since epoch in seconds. // -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodeJpegChannels(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodeJpegRatio sets the optional ratio attribute to value. +// Returns the timestamp as a `float64` for seconds since the Unix epoch. // -// value: Downscaling ratio. -// If not specified, defaults to 1 -func DecodeJpegRatio(value int64) DecodeJpegAttr { - return func(m optionalAttr) { - m["ratio"] = value +// Note: the timestamp is computed when the op is executed, not when it is added +// to the graph. +func Timestamp(scope *Scope) (ts tf.Output) { + if scope.Err() != nil { + return } -} - -// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value. -// -// value: If true use a slower but nicer upscaling of the -// chroma planes (yuv420/422 only). -// If not specified, defaults to true -func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["fancy_upscaling"] = value + opspec := tf.OpSpec{ + Type: "Timestamp", } + op := scope.AddOperation(opspec) + return op.Output(0) } -// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value. +// Returns immutable tensor from memory region. // -// value: If true try to recover an image from truncated input. -// If not specified, defaults to false -func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr { - return func(m optionalAttr) { - m["try_recover_truncated"] = value +// The current implementation memmaps the tensor from a file. +// +// Arguments: +// dtype: Type of the returned tensor. +// shape: Shape of the returned tensor. +// memory_region_name: Name of readonly memory region used by the tensor, see +// NewReadOnlyMemoryRegionFromFile in tensorflow::Env. +func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) { + if scope.Err() != nil { + return } -} + attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name} + opspec := tf.OpSpec{ + Type: "ImmutableConst", -// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value. -// -// value: The minimum required fraction of lines before a truncated -// input is accepted. -// If not specified, defaults to 1 -func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { - return func(m optionalAttr) { - m["acceptable_fraction"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// DecodeJpegDctMethod sets the optional dct_method attribute to value. +// StringJoinAttr is an optional argument to StringJoin. +type StringJoinAttr func(optionalAttr) + +// StringJoinSeparator sets the optional separator attribute to value. // -// value: string specifying a hint about the algorithm used for -// decompression. Defaults to "" which maps to a system-specific -// default. Currently valid values are ["INTEGER_FAST", -// "INTEGER_ACCURATE"]. The hint may be ignored (e.g., the internal -// jpeg library changes to a version that does not have that specific -// option.) +// value: string, an optional join separator. // If not specified, defaults to "" -func DecodeJpegDctMethod(value string) DecodeJpegAttr { +func StringJoinSeparator(value string) StringJoinAttr { return func(m optionalAttr) { - m["dct_method"] = value + m["separator"] = value } } -// Decode a JPEG-encoded image to a uint8 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the JPEG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// -// If needed, the JPEG-encoded image is transformed to match the requested number -// of color channels. -// -// The attr `ratio` allows downscaling the image by an integer factor during -// decoding. Allowed values are: 1, 2, 4, and 8. This is much faster than -// downscaling the image later. -// +// Joins the strings in the given list of string tensors into one tensor; // -// This op also supports decoding PNGs and non-animated GIFs since the interface is -// the same, though it is cleaner to use `tf.image.decode_image`. +// with the given separator (default is an empty separator). // // Arguments: -// contents: 0-D. The JPEG-encoded image. -// -// Returns 3-D with shape `[height, width, channels]`.. -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { +// inputs: A list of string tensors. The tensors must all have the same shape, +// or be scalars. Scalars may be mixed in; these will be broadcast to the shape +// of non-scalar inputs. +func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -22100,9 +21496,9 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeJpeg", + Type: "StringJoin", Input: []tf.Input{ - contents, + tf.OutputList(inputs), }, Attrs: attrs, } @@ -22110,248 +21506,335 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i return op.Output(0) } -// Inverse 3D real-valued fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most 3 dimensions of `input`. -// -// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`: -// The inner-most dimension contains the `fft_length / 2 + 1` unique components of -// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed -// from the size of the inner-most 3 dimensions of `input`. If the FFT length used -// to compute `input` is odd, it should be provided since it cannot be inferred -// properly. -// -// Along each axis `IRFFT3D` is computed on, if `fft_length` (or -// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the -// corresponding dimension of `input`, the dimension is cropped. If it is larger, -// the dimension is padded with zeros. -// -// Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. +// Creates and returns an empty tensor list. // -// Returns A float32 tensor of the same rank as `input`. The inner-most 3 -// dimensions of `input` are replaced with the `fft_length` samples of their -// inverse 3D real Fourier transform. +// All list elements must be tensors of dtype element_dtype and shape compatible +// with element_shape. // -// @compatibility(numpy) -// Equivalent to np.irfftn with 3 dimensions. -// @end_compatibility -func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// handle: an empty tensor list. +// element_dtype: the type of elements in the list. +// element_shape: a shape compatible with that of elements in the list. +func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "IRFFT3D", + Type: "EmptyTensorList", Input: []tf.Input{ - input, fft_length, + element_shape, max_num_elements, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of (x != y) element-wise. +// Returns a list of tensors with the same shapes and contents as the input // -// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// tensors. +// +// This op can be used to override the gradient for complicated functions. For +// example, suppose y = f(x) and we wish to apply a custom function g for backprop +// such that dx = g(dy). In Python, +// +// ```python +// with tf.get_default_graph().gradient_override_map( +// {'IdentityN': 'OverrideGradientWithG'}): +// y, _ = identity_n([f(x), x]) +// +// @tf.RegisterGradient('OverrideGradientWithG') +// def ApplyG(op, dy, _): +// return [None, g(dy)] # Do not backprop to f(x). +// ``` +func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "NotEqual", + Type: "IdentityN", Input: []tf.Input{ - x, y, + tf.OutputList(input), }, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Produces the max pool of the input tensor for quantized types. -// -// Arguments: -// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. -// min_input: The float value that the lowest quantized input value represents. -// max_input: The float value that the highest quantized input value represents. -// ksize: The size of the window for each dimension of the input tensor. -// The length must be 4 to match the number of dimensions of the input. -// strides: The stride of the sliding window for each dimension of the input -// tensor. The length must be 4 to match the number of dimensions of the input. -// padding: The type of padding algorithm to use. -// -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "QuantizedMaxPool", - Input: []tf.Input{ - input, min_input, max_input, - }, - Attrs: attrs, + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("IdentityN", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return output } -// Computes softplus: `log(exp(features) + 1)`. -func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { +// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp. +type ResourceApplyCenteredRMSPropAttr func(optionalAttr) + +// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the centered RMSProp algorithm. +// +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. +// +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// mg <- rho * mg_{t-1} + (1-rho) * grad +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Softplus", + Type: "ResourceApplyCenteredRMSProp", Input: []tf.Input{ - features, + var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes exponential of x - 1 element-wise. +// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. +type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. // -// I.e., \\(y = (\exp x) - 1\\). -func Expm1(scope *Scope, x tf.Output) (y tf.Output) { +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the centered RMSProp algorithm. +// +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. +// +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. +// +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var, ms and mom. +// +// Returns the created operation. +func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Expm1", + Type: "ResourceSparseApplyCenteredRMSProp", Input: []tf.Input{ - x, + var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns the number of records this Reader has produced. -// -// This is the same as the number of ReaderRead executions that have -// succeeded. +// Creates a dataset that batches `batch_size` elements from `input_dataset`. // // Arguments: -// reader_handle: Handle to a Reader. -func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { +// +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// +// +func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ReaderNumRecordsProducedV2", + Type: "BatchDataset", Input: []tf.Input{ - reader_handle, + input_dataset, batch_size, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorListConcatAttr is an optional argument to TensorListConcat. -type TensorListConcatAttr func(optionalAttr) +// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters. +type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) -// TensorListConcatElementShape sets the optional element_shape attribute to value. -// If not specified, defaults to -func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr { +// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr { return func(m optionalAttr) { - m["element_shape"] = value + m["table_id"] = value } } -// Concats all tensors in the list along the 0th dimension. +// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load Adadelta embedding parameters. // -// Requires that all tensors have the same shape except the first dimension. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// input_handle: The input list. -// tensor: The concated result. -// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. +// Arguments: +// parameters: Value of parameters used in the Adadelta optimization algorithm. +// accumulators: Value of accumulators used in the Adadelta optimization algorithm. +// updates: Value of updates used in the Adadelta optimization algorithm. // -func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) { +// +// +// Returns the created operation. +func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorListConcat", + Type: "LoadTPUEmbeddingAdadeltaParameters", Input: []tf.Input{ - input_handle, + parameters, accumulators, updates, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// Returns the set of files matching one or more glob patterns. +// Converts each string in the input Tensor to its hash mod by a number of buckets. // -// Note that this routine only supports wildcard characters in the -// basename portion of the pattern, not in the directory portion. -// Note also that the order of filenames returned can be non-deterministic. +// The hash function is deterministic on the content of the string within the +// process and will never change. However, it is not suitable for cryptography. +// This function may be used when CPU time is scarce and inputs are trusted or +// unimportant. There is a risk of adversaries constructing inputs that all hash +// to the same bucket. To prevent this problem, use a strong hash function with +// `tf.string_to_hash_bucket_strong`. // // Arguments: -// pattern: Shell wildcard pattern(s). Scalar or vector of type string. +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. // -// Returns A vector of matching filenames. -func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) { +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_buckets": num_buckets} opspec := tf.OpSpec{ - Type: "MatchingFiles", + Type: "StringToHashBucketFast", Input: []tf.Input{ - pattern, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth. -type HistogramFixedWidthAttr func(optionalAttr) +// RealAttr is an optional argument to Real. +type RealAttr func(optionalAttr) -// HistogramFixedWidthDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_INT32 -func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr { +// RealTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func RealTout(value tf.DataType) RealAttr { return func(m optionalAttr) { - m["dtype"] = value + m["Tout"] = value } } -// Return histogram of values. +// Returns the real part of a complex number. // -// Given the tensor `values`, this operation returns a rank 1 histogram counting -// the number of entries in `values` that fall into every bin. The bins are -// equal width and determined by the arguments `value_range` and `nbins`. +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the real part of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real +// part returned by this operation and *b* is the imaginary part. // -// ```python -// # Bins will be: (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf) -// nbins = 5 -// value_range = [0.0, 5.0] -// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15] +// For example: // -// with tf.get_default_session() as sess: -// hist = tf.histogram_fixed_width(new_values, value_range, nbins=5) -// variables.global_variables_initializer().run() -// sess.run(hist) => [2, 1, 1, 0, 2] // ``` -// -// Arguments: -// values: Numeric `Tensor`. -// value_range: Shape [2] `Tensor` of same `dtype` as `values`. -// values <= value_range[0] will be mapped to hist[0], -// values >= value_range[1] will be mapped to hist[-1]. -// nbins: Scalar `int32 Tensor`. Number of histogram bins. -// -// Returns A 1-D `Tensor` holding histogram of values. -func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) { +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.real(input) ==> [-2.25, 3.25] +// ``` +func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -22360,9 +21843,9 @@ func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "HistogramFixedWidth", + Type: "Real", Input: []tf.Input{ - values, value_range, nbins, + input, }, Attrs: attrs, } @@ -22370,64 +21853,55 @@ func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, return op.Output(0) } -// Conv3DAttr is an optional argument to Conv3D. -type Conv3DAttr func(optionalAttr) +// AudioSummaryAttr is an optional argument to AudioSummary. +type AudioSummaryAttr func(optionalAttr) -// Conv3DDataFormat sets the optional data_format attribute to value. +// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value. // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func Conv3DDataFormat(value string) Conv3DAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Conv3DDilations sets the optional dilations attribute to value. +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 // -// value: 1-D tensor of length 5. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each -// filter element on that dimension. The dimension order is determined by the -// value of `data_format`, see above for details. Dilations in the batch and -// depth dimensions must be 1. -// If not specified, defaults to -func Conv3DDilations(value []int64) Conv3DAttr { +// REQUIRES: value >= 1 +func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr { return func(m optionalAttr) { - m["dilations"] = value + m["max_outputs"] = value } } -// Computes a 3-D convolution given 5-D `input` and `filter` tensors. +// Outputs a `Summary` protocol buffer with audio. // -// In signal processing, cross-correlation is a measure of similarity of -// two waveforms as a function of a time-lag applied to one of them. This -// is also known as a sliding dot product or sliding inner-product. +// DEPRECATED at GraphDef version 15: Use AudioSummaryV2. // -// Our Conv3D implements a form of cross-correlation. +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. // // Arguments: -// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. -// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, -// out_channels]`. `in_channels` must match between `input` and `filter`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{"sample_rate": sample_rate} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv3D", + Type: "AudioSummary", Input: []tf.Input{ - input, filter, + tag, tensor, }, Attrs: attrs, } @@ -22435,324 +21909,354 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa return op.Output(0) } -// QuantizeV2Attr is an optional argument to QuantizeV2. -type QuantizeV2Attr func(optionalAttr) - -// QuantizeV2Mode sets the optional mode attribute to value. -// If not specified, defaults to "MIN_COMBINED" -func QuantizeV2Mode(value string) QuantizeV2Attr { - return func(m optionalAttr) { - m["mode"] = value - } -} +// QrAttr is an optional argument to Qr. +type QrAttr func(optionalAttr) -// QuantizeV2RoundMode sets the optional round_mode attribute to value. -// If not specified, defaults to "HALF_AWAY_FROM_ZERO" -func QuantizeV2RoundMode(value string) QuantizeV2Attr { +// QrFullMatrices sets the optional full_matrices attribute to value. +// +// value: If true, compute full-sized `q` and `r`. If false +// (the default), compute only the leading `P` columns of `q`. +// If not specified, defaults to false +func QrFullMatrices(value bool) QrAttr { return func(m optionalAttr) { - m["round_mode"] = value + m["full_matrices"] = value } } -// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. -// -// [min_range, max_range] are scalar floats that specify the range for -// the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. The -// 'round_mode' attribute controls which rounding tie-breaking algorithm is used -// when rounding float values to their quantized equivalents. -// -// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: -// -// ``` -// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) -// if T == qint8: out[i] -= (range(T) + 1) / 2.0 -// ``` -// -// here `range(T) = numeric_limits::max() - numeric_limits::min()` -// -// *MIN_COMBINED Mode Example* -// -// Assume the input is type float and has a possible range of [0.0, 6.0] and the -// output type is quint8 ([0, 255]). The min_range and max_range values should be -// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each -// value of the input by 255/6 and cast to quint8. -// -// If the output type was qint8 ([-128, 127]), the operation will additionally -// subtract each value by 128 prior to casting, so that the range of values aligns -// with the range of qint8. -// -// If the mode is 'MIN_FIRST', then this approach is used: -// -// ``` -// num_discrete_values = 1 << (# of bits in T) -// range_adjust = num_discrete_values / (num_discrete_values - 1) -// range = (range_max - range_min) * range_adjust -// range_scale = num_discrete_values / range -// quantized = round(input * range_scale) - round(range_min * range_scale) + -// numeric_limits::min() -// quantized = max(quantized, numeric_limits::min()) -// quantized = min(quantized, numeric_limits::max()) -// ``` -// -// The biggest difference between this and MIN_COMBINED is that the minimum range -// is rounded first, before it's subtracted from the rounded value. With -// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing -// and dequantizing will introduce a larger and larger error. -// -// *SCALED mode Example* -// -// `SCALED` mode matches the quantization approach used in -// `QuantizeAndDequantize{V2|V3}`. -// -// If the mode is `SCALED`, we do not use the full range of the output type, -// choosing to elide the lowest possible value for symmetry (e.g., output range is -// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to -// 0. -// -// We first find the range of values in our tensor. The -// range we use is always centered on 0, so we find m such that -// -// ```c++ -// m = max(abs(input_min), abs(input_max)) -// ``` -// -// Our input tensor range is then `[-m, m]`. -// -// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. -// If T is signed, this is -// -// ``` -// num_bits = sizeof(T) * 8 -// [min_fixed, max_fixed] = -// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] -// ``` -// -// Otherwise, if T is unsigned, the fixed-point range is -// -// ``` -// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] -// ``` -// -// From this we compute our scaling factor, s: -// -// ```c++ -// s = (max_fixed - min_fixed) / (2 * m) -// ``` +// Computes the QR decompositions of one or more matrices. // -// Now we can quantize the elements of our tensor: +// Computes the QR decomposition of each inner matrix in `tensor` such that +// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])` // -// ```c++ -// result = round(input * s) +// ```python +// # a is a tensor. +// # q is a tensor of orthonormal matrices. +// # r is a tensor of upper triangular matrices. +// q, r = qr(a) +// q_full, r_full = qr(a, full_matrices=True) // ``` // -// One thing to watch out for is that the operator may choose to adjust the -// requested minimum and maximum values slightly during the quantization process, -// so you should always use the output ports as the range for further calculations. -// For example, if the requested minimum and maximum values are close to equal, -// they will be separated by a small epsilon value to prevent ill-formed quantized -// buffers from being created. Otherwise, you can end up with buffers where all the -// quantized values map to the same float value, which causes problems for -// operations that have to perform further calculations on them. -// // Arguments: +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. // -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -// -// -// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. -func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then +// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is +// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`. +func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizeV2", + Type: "Qr", Input: []tf.Input{ - input, min_range, max_range, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1) } -// Creates a dataset that batches `batch_size` elements from `input_dataset`. +// TensorArrayV3Attr is an optional argument to TensorArrayV3. +type TensorArrayV3Attr func(optionalAttr) + +// TensorArrayV3ElementShape sets the optional element_shape attribute to value. // -// Arguments: +// value: The expected shape of an element, if known. Used to +// validate the shapes of TensorArray elements. If this shape is not +// fully specified, gathering zero-size TensorArrays is an error. +// If not specified, defaults to +func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr { + return func(m optionalAttr) { + m["element_shape"] = value + } +} + +// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value. // -// batch_size: A scalar representing the number of elements to accumulate in a batch. -// drop_remainder: A scalar representing whether the last batch should be dropped in case its size -// is smaller than desired. +// value: A boolean that determines whether writes to the TensorArray +// are allowed to grow the size. By default, this is not allowed. +// If not specified, defaults to false +func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["dynamic_size"] = value + } +} + +// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value. // +// value: If true (default), Tensors in the TensorArray are cleared +// after being read. This disables multiple read semantics but allows early +// release of memory. +// If not specified, defaults to true +func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["clear_after_read"] = value + } +} + +// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value. // -func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// value: If true (default is false), then all +// elements in the TensorArray will be expected to have have identical shapes. +// This allows certain behaviors, like dynamically checking for +// consistent shapes on write, and being able to fill in properly +// shaped zero tensors on stack -- even if the element_shape attribute +// is not fully defined. +// If not specified, defaults to false +func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr { + return func(m optionalAttr) { + m["identical_element_shapes"] = value + } +} + +// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value. +// +// value: Overrides the name used for the temporary tensor_array +// resource. Default value is the name of the 'TensorArray' op (which +// is guaranteed unique). +// If not specified, defaults to "" +func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr { + return func(m optionalAttr) { + m["tensor_array_name"] = value + } +} + +// An array of Tensors of given size. +// +// Write data via Write and read via Read or Pack. +// +// Arguments: +// size: The size of the array. +// dtype: The type of the elements on the tensor_array. +// +// Returns The handle to the TensorArray.A scalar used to control gradient flow. +func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"dtype": dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchDatasetV2", + Type: "TensorArrayV3", Input: []tf.Input{ - input_dataset, batch_size, drop_remainder, + size, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Returns the truth value of NOT x element-wise. +func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LogicalNot", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) return op.Output(0) } -// Saves the input tensors to disk. +// 3D real-valued fast Fourier transform. // -// The size of `tensor_names` must match the number of tensors in `data`. `data[i]` -// is written to `filename` with name `tensor_names[i]`. +// Computes the 3-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most 3 dimensions of `input`. // -// See also `SaveSlices`. +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the +// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension +// of `output`: the zero-frequency term, followed by the `fft_length / 2` +// positive-frequency terms. +// +// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// filename: Must have a single element. The name of the file to which we write -// the tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// data: `N` tensors to save. +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. // -// Returns the created operation. -func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) { +// Returns A complex64 tensor of the same rank as `input`. The inner-most 3 +// dimensions of `input` are replaced with the their 3D Fourier transform. The +// inner-most dimension contains `fft_length / 2 + 1` unique frequency +// components. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfftn with 3 dimensions. +// @end_compatibility +func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Save", + Type: "RFFT3D", Input: []tf.Input{ - filename, tensor_names, tf.OutputList(data), + input, fft_length, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is -// -// true, this follows Python semantics in that the result here is consistent -// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`. -// -// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Computes rectified linear: `max(features, 0)`. +func Relu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FloorMod", + Type: "Relu", Input: []tf.Input{ - x, y, + features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradient of morphological 2-D dilation with respect to the filter. +// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign. +type ResourceApplyAddSignAttr func(optionalAttr) + +// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and m tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the AddSign update. +// +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// update <- (alpha + sign_decay * sign(g) *sign(m)) * g +// variable <- variable - lr_t * update // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. -// strides: 1-D of length 4. The stride of the sliding window for each dimension of -// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: 1-D of length 4. The input stride for atrous morphological dilation. -// Must be: `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// alpha: Must be a scalar. +// sign_decay: Must be a scalar. +// beta: Must be a scalar. +// grad: The gradient. // -// Returns 3-D with shape `[filter_height, filter_width, depth]`. -func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) { +// Returns the created operation. +func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Dilation2DBackpropFilter", + Type: "ResourceApplyAddSign", Input: []tf.Input{ - input, filter, out_backprop, + var_, m, lr, alpha, sign_decay, beta, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`. +// Divides sparse updates into the variable referenced by `resource`. // -// tensor: The tensor to put on the list. -// input_handle: The old list. -// output_handle: A list with the elements of the old list followed by tensor. -// element_dtype: the type of elements in the list. -// element_shape: a shape compatible with that of elements in the list. -func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) { +// This operation computes +// +// # Scalar indices +// ref[indices, ...] /= updates[...] +// +// # Vector indices (for each i) +// ref[indices[i], ...] /= updates[i, ...] +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...] +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions multiply. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListPushBack", + Type: "ResourceScatterDiv", Input: []tf.Input{ - input_handle, tensor, + resource, indices, updates, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. -type AddSparseToTensorsMapAttr func(optionalAttr) +// ListDiffAttr is an optional argument to ListDiff. +type ListDiffAttr func(optionalAttr) -// AddSparseToTensorsMapContainer sets the optional container attribute to value. -// -// value: The container name for the `SparseTensorsMap` created by this op. -// If not specified, defaults to "" -func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { +// ListDiffOutIdx sets the optional out_idx attribute to value. +// If not specified, defaults to DT_INT32 +func ListDiffOutIdx(value tf.DataType) ListDiffAttr { return func(m optionalAttr) { - m["container"] = value + m["out_idx"] = value } } -// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. +// Computes the difference between two lists of numbers or strings. // -// value: The shared name for the `SparseTensorsMap` created by this op. -// If blank, the new Operation's unique name is used. -// If not specified, defaults to "" -func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. +// Given a list `x` and a list `y`, this operation returns a list `out` that +// represents all values that are in `x` but not in `y`. The returned list `out` +// is sorted in the same order that the numbers appear in `x` (duplicates are +// preserved). This operation also returns a list `idx` that represents the +// position of each `out` element in `x`. In other words: // -// A `SparseTensor` is represented by three tensors: `sparse_indices`, -// `sparse_values`, and `sparse_shape`. +// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]` // -// This operator takes the given `SparseTensor` and adds it to a container -// object (a `SparseTensorsMap`). A unique key within this container is generated -// in the form of an `int64`, and this is the value that is returned. +// For example, given this input: // -// The `SparseTensor` can then be read out as part of a minibatch by passing -// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure -// the correct `SparseTensorsMap` is accessed, ensure that the same -// `container` and `shared_name` are passed to that Op. If no `shared_name` -// is provided here, instead use the *name* of the Operation created by calling -// `AddSparseToTensorsMap` as the `shared_name` passed to -// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. +// ``` +// x = [1, 2, 3, 4, 5, 6] +// y = [1, 3, 5] +// ``` +// +// This operation would return: +// +// ``` +// out ==> [2, 4, 6] +// idx ==> [1, 3, 5] +// ``` // // Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +// x: 1-D. Values to keep. +// y: 1-D. Values to remove. // -// Returns 0-D. The handle of the `SparseTensor` now stored in the -// `SparseTensorsMap`. -func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { +// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`. +func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) { if scope.Err() != nil { return } @@ -22761,308 +22265,230 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values a(attrs) } opspec := tf.OpSpec{ - Type: "AddSparseToTensorsMap", + Type: "ListDiff", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + x, y, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Deserialize and concatenate `SparseTensors` from a serialized minibatch. -// -// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where -// `N` is the minibatch size and the rows correspond to packed outputs of -// `SerializeSparse`. The ranks of the original `SparseTensor` objects -// must all match. When the final `SparseTensor` is created, it has rank one -// higher than the ranks of the incoming `SparseTensor` objects -// (they have been concatenated along a new row dimension). -// -// The output `SparseTensor` object's shape values for all dimensions but the -// first are the max across the input `SparseTensor` objects' shape values -// for the corresponding dimensions. Its first shape value is `N`, the minibatch -// size. -// -// The input `SparseTensor` objects' indices are assumed ordered in -// standard lexicographic order. If this is not the case, after this -// step run `SparseReorder` to restore index ordering. -// -// For example, if the serialized input is a `[2 x 3]` matrix representing two -// original `SparseTensor` objects: +// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug. +type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// index = [ 0] -// [10] -// [20] -// values = [1, 2, 3] -// shape = [50] +// REQUIRES: value >= -1 +func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load Adadelta parameters with debug support. // -// and +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// index = [ 2] -// [10] -// values = [4, 5] -// shape = [30] +// Arguments: +// parameters: Value of parameters used in the Adadelta optimization algorithm. +// accumulators: Value of accumulators used in the Adadelta optimization algorithm. +// updates: Value of updates used in the Adadelta optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm. // -// then the final deserialized `SparseTensor` will be: // -// index = [0 0] -// [0 10] -// [0 20] -// [1 2] -// [1 10] -// values = [1, 2, 3, 4, 5] -// shape = [2 50] // -// Arguments: -// serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects. -// Must have 3 columns. -// dtype: The `dtype` of the serialized `SparseTensor` objects. -func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) { +// Returns the created operation. +func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DeserializeManySparse", + Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug", Input: []tf.Input{ - serialized_sparse, + parameters, accumulators, updates, gradient_accumulators, }, Attrs: attrs, } + return scope.AddOperation(opspec) +} + +// Return a tensor with the same shape and contents as the input tensor or value. +func Identity(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Identity", + Input: []tf.Input{ + input, + }, + } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Elementwise computes the bitwise AND of `x` and `y`. +// Computes arctangent of `y/x` element-wise, respecting signs of the arguments. // -// The result will have those bits set, that are set in both `x` and `y`. The -// computation is performed on the underlying representations of `x` and `y`. -func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// This is the angle \( \theta \in [-\pi, \pi] \) such that +// \[ x = r \cos(\theta) \] +// and +// \[ y = r \sin(\theta) \] +// where \(r = \sqrt(x^2 + y^2) \). +func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BitwiseAnd", + Type: "Atan2", Input: []tf.Input{ - x, y, + y, x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Inverse real-valued fast Fourier transform. -// -// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued -// signal over the inner-most dimension of `input`. -// -// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the -// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If -// `fft_length` is not provided, it is computed from the size of the inner-most -// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to -// compute `input` is odd, it should be provided since it cannot be inferred -// properly. +// Updates specified rows with values in `v`. // -// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller -// than the corresponding dimension of `input`, the dimension is cropped. If it is -// larger, the dimension is padded with zeros. +// Computes `x[i, :] = v; return x`. // // Arguments: -// input: A complex64 tensor. -// fft_length: An int32 tensor of shape [1]. The FFT length. -// -// Returns A float32 tensor of the same rank as `input`. The inner-most -// dimension of `input` is replaced with the `fft_length` samples of its inverse -// 1D Fourier transform. +// x: A tensor of type `T`. +// i: A vector. Indices into the left-most dimension of `x`. +// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. // -// @compatibility(numpy) -// Equivalent to np.fft.irfft -// @end_compatibility -func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { +// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. +func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IRFFT", + Type: "InplaceUpdate", Input: []tf.Input{ - input, fft_length, + x, i, v, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Concatenates a list of `SparseTensor` along the specified dimension. -// -// Concatenation is with respect to the dense versions of these sparse tensors. -// It is assumed that each input is a `SparseTensor` whose elements are ordered -// along increasing dimension number. -// -// All inputs' shapes must match, except for the concat dimension. The -// `indices`, `values`, and `shapes` lists must have the same length. -// -// The output shape is identical to the inputs', except along the concat -// dimension, where it is the sum of the inputs' sizes along that dimension. -// -// The output elements will be resorted to preserve the sort order along -// increasing dimension number. -// -// This op runs in `O(M log M)` time, where `M` is the total number of non-empty -// values across all inputs. This is due to the need for an internal sort in -// order to concatenate efficiently across an arbitrary dimension. -// -// For example, if `concat_dim = 1` and the inputs are -// -// sp_inputs[0]: shape = [2, 3] -// [0, 2]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// sp_inputs[1]: shape = [2, 4] -// [0, 1]: "d" -// [0, 2]: "e" -// -// then the output will be -// -// shape = [2, 7] -// [0, 2]: "a" -// [0, 4]: "d" -// [0, 5]: "e" -// [1, 0]: "b" -// [1, 1]: "c" +// OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple. +type OutfeedDequeueTupleAttr func(optionalAttr) + +// OutfeedDequeueTupleDeviceOrdinal sets the optional device_ordinal attribute to value. // -// Graphically this is equivalent to doing +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func OutfeedDequeueTupleDeviceOrdinal(value int64) OutfeedDequeueTupleAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// Retrieve multiple values from the computation outfeed. // -// [ a] concat [ d e ] = [ a d e ] -// [b c ] [ ] [b c ] +// This operation will block indefinitely until data is available. Output `i` +// corresponds to XLA tuple element `i`. // // Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. Non-empty values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// concat_dim: Dimension to concatenate along. Must be in range [-rank, rank), -// where rank is the number of dimensions in each input `SparseTensor`. +// dtypes: The element types of each element in `outputs`. +// shapes: The shapes of each tensor in `outputs`. // -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// Returns A list of tensors that will be read from the outfeed. +func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, optional ...OutfeedDequeueTupleAttr) (outputs []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"concat_dim": concat_dim} + attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseConcat", - Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), - }, + Type: "OutfeedDequeueTuple", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("OutfeedDequeueTuple", err) + return + } + return outputs } -// Generates sparse cross from a list of sparse and dense tensors. -// -// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each -// representing features of one feature column. It outputs a 2D `SparseTensor` with -// the batchwise crosses of these features. -// -// For example, if the inputs are +// Identity op for gradient debugging. // -// inputs[0]: SparseTensor with shape = [2, 2] -// [0, 0]: "a" -// [1, 0]: "b" -// [1, 1]: "c" -// -// inputs[1]: SparseTensor with shape = [2, 1] -// [0, 0]: "d" -// [1, 0]: "e" -// -// inputs[2]: Tensor [["f"], ["g"]] -// -// then the output will be -// -// shape = [2, 2] -// [0, 0]: "a_X_d_X_f" -// [1, 0]: "b_X_e_X_g" -// [1, 1]: "c_X_e_X_g" -// -// if hashed_output=true then the output will be -// -// shape = [2, 2] -// [0, 0]: FingerprintCat64( -// Fingerprint64("f"), FingerprintCat64( -// Fingerprint64("d"), Fingerprint64("a"))) -// [1, 0]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("b"))) -// [1, 1]: FingerprintCat64( -// Fingerprint64("g"), FingerprintCat64( -// Fingerprint64("e"), Fingerprint64("c"))) -// -// Arguments: -// indices: 2-D. Indices of each input `SparseTensor`. -// values: 1-D. values of each `SparseTensor`. -// shapes: 1-D. Shapes of each `SparseTensor`. -// dense_inputs: 2-D. Columns represented by dense `Tensor`. -// hashed_output: If true, returns the hash of the cross instead of the string. -// This will allow us avoiding string manipulations. -// num_buckets: It is used if hashed_output is true. -// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. -// hash_key: Specify the hash_key that will be used by the `FingerprintCat64` -// function to combine the crosses fingerprints. -// -// -// -// Returns 2-D. Indices of the concatenated `SparseTensor`.1-D. Non-empty values of the concatenated or hashed -// `SparseTensor`.1-D. Shape of the concatenated `SparseTensor`. -func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { +// This op is hidden from public in Python. It is used by TensorFlow Debugger to +// register gradient tensors for gradient debugging. +// This op operates on non-reference-type tensors. +func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type} opspec := tf.OpSpec{ - Type: "SparseCross", + Type: "DebugGradientIdentity", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs), + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. -type ResourceApplyProximalAdagradAttr func(optionalAttr) +// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta. +type ResourceSparseApplyAdadeltaAttr func(optionalAttr) -// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. +// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value. // // value: If True, updating of the var and accum tensors will be protected by // a lock; otherwise the behavior is undefined, but may exhibit less contention. // If not specified, defaults to false -func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { +func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. -// -// accum += grad * grad -// prox_v = var - lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// var: Should be from a Variable(). // // Arguments: -// var_: Should be from a Variable(). +// // accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. +// accum_update: : Should be from a Variable(). +// lr: Learning rate. Must be a scalar. +// rho: Decay factor. Must be a scalar. +// epsilon: Constant factor. Must be a scalar. // grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. // // Returns the created operation. -func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { +func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -23071,133 +22497,304 @@ func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyProximalAdagrad", + Type: "ResourceSparseApplyAdadelta", Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, + var_, accum, accum_update, lr, rho, epsilon, grad, indices, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. -type MutableHashTableOfTensorsV2Attr func(optionalAttr) +// Returns which elements of x are NaN. +// +// @compatibility(numpy) +// Equivalent to np.isnan +// @end_compatibility +func IsNan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IsNan", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. +// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. +type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr) + +// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, height, width, channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, channels, height, width]. +// If not specified, defaults to "NHWC" +func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr { return func(m optionalAttr) { - m["container"] = value + m["data_format"] = value } } -// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. +// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value. // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["dilations"] = value } } -// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value +// Computes the gradients of depthwise convolution with respect to the filter. +// +// Arguments: +// input: 4-D with shape based on `data_format`. For example, if +// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height, +// in_width, in_channels]` tensor. +// filter_sizes: An integer vector representing the tensor shape of `filter`, +// where `filter` is a 4-D +// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor. +// out_backprop: 4-D with shape based on `data_format`. +// For example, if `data_format` is 'NHWC' then +// out_backprop shape is `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. Gradient w.r.t. +// the `filter` input of the convolution. +func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DepthwiseConv2dNativeBackpropFilter", + Input: []tf.Input{ + input, filter_sizes, out_backprop, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. -// If not specified, defaults to <> -func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { +// MapUnstageAttr is an optional argument to MapUnstage. +type MapUnstageAttr func(optionalAttr) + +// MapUnstageCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func MapUnstageCapacity(value int64) MapUnstageAttr { return func(m optionalAttr) { - m["value_shape"] = value + m["capacity"] = value } } -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a vector. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// MapUnstageMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// REQUIRES: value >= 0 +func MapUnstageMemoryLimit(value int64) MapUnstageAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapUnstageContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageContainer(value string) MapUnstageAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapUnstageSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageSharedName(value string) MapUnstageAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns the values associated with the key // -// Returns Handle to a table. -func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { +// from the underlying container. If the underlying container +// does not contain this key, the op will block until it does. +func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageAttr) (values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MutableHashTableOfTensorsV2", - + Type: "MapUnstage", + Input: []tf.Input{ + key, indices, + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstage", err) + return + } + return values } -// The gradient operator for the SparseSlice op. +// An op enabling differentiation of TPU Embeddings. // -// This op takes in the upstream gradient w.r.t. non-empty values of -// the sliced `SparseTensor`, and outputs the gradients w.r.t. -// the non-empty values of input `SparseTensor`. +// This op simply returns its first input, which is assumed to have been sliced +// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of +// this op, and its first argument being a trainable Variable, enables automatic +// differentiation of graphs containing embeddings via the TPU Embedding Python +// libraries. // // Arguments: -// backprop_val_grad: 1-D. The gradient with respect to -// the non-empty values of the sliced `SparseTensor`. -// input_indices: 2-D. The `indices` of the input `SparseTensor`. -// input_start: 1-D. tensor represents the start of the slice. -// output_indices: 2-D. The `indices` of the sliced `SparseTensor`. -// -// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`. -func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) { +// embedding_variable: A trainable variable, enabling optimizers to find this op. +// sliced_activations: The embedding activations Tensor to return. +// table_id: The id of the table in the embedding layer configuration from which +// these activations were computed. +// lookup_id: Identifier of the set of embedding indices which produced these +// activations. +func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id} opspec := tf.OpSpec{ - Type: "SparseSliceGrad", + Type: "TPUEmbeddingActivations", Input: []tf.Input{ - backprop_val_grad, input_indices, input_start, output_indices, + embedding_variable, sliced_activations, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that changes the batch size. +// BatchToSpace for 4-D tensors of type T. // -// Creates a dataset that changes the batch size of the dataset to current batch -// size // num_workers. +// This is a legacy version of the more general BatchToSpaceND. +// +// Rearranges (permutes) data from batch into blocks of spatial data, followed by +// cropping. This is the reverse transformation of SpaceToBatch. More specifically, +// this op outputs a copy of the input tensor where values from the `batch` +// dimension are moved in spatial blocks to the `height` and `width` dimensions, +// followed by cropping along the `height` and `width` dimensions. // // Arguments: -// input_dataset: A variant tensor representing the input dataset. -// num_workers: A scalar representing the number of workers to distribute this batch across. As -// a result of this transformation the current batch size would end up being -// divided by this parameter. +// input: 4-D tensor with shape +// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size, +// depth]`. Note that the batch size of the input tensor must be divisible by +// `block_size * block_size`. +// crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies +// how many elements to crop from the intermediate result across the spatial +// dimensions as follows: // +// crops = [[crop_top, crop_bottom], [crop_left, crop_right]] // -func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// +// Returns 4-D with shape `[batch, height, width, depth]`, where: +// +// height = height_pad - crop_top - crop_bottom +// width = width_pad - crop_left - crop_right +// +// The attr `block_size` must be greater than one. It indicates the block size. +// +// Some examples: +// +// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2: +// +// ``` +// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 1]` and value: +// +// ``` +// x = [[[[1], [2]], [[3], [4]]]] +// ``` +// +// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2: +// +// ``` +// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] +// ``` +// +// The output tensor has shape `[1, 2, 2, 3]` and value: +// +// ``` +// x = [[[[1, 2, 3], [4, 5, 6]], +// [[7, 8, 9], [10, 11, 12]]]] +// ``` +// +// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2: +// +// ``` +// x = [[[[1], [3]], [[9], [11]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +// +// The output tensor has shape `[1, 4, 4, 1]` and value: +// +// ``` +// x = [[[1], [2], [3], [4]], +// [[5], [6], [7], [8]], +// [[9], [10], [11], [12]], +// [[13], [14], [15], [16]]] +// ``` +// +// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2: +// +// ``` +// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]], +// [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]] +// ``` +// +// The output tensor has shape `[2, 2, 4, 1]` and value: +// +// ``` +// x = [[[[1], [3]], [[5], [7]]], +// [[[2], [4]], [[10], [12]]], +// [[[5], [7]], [[13], [15]]], +// [[[6], [8]], [[14], [16]]]] +// ``` +func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"block_size": block_size} opspec := tf.OpSpec{ - Type: "ExperimentalRebatchDataset", + Type: "BatchToSpace", Input: []tf.Input{ - input_dataset, num_workers, + input, crops, }, Attrs: attrs, } @@ -23205,141 +22802,139 @@ func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_worke return op.Output(0) } -// Computes the gradient of the sigmoid of `x` wrt its input. -// -// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and -// `dy` is the corresponding input gradient. -func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// Produces a summary of any statistics recorded by the given statistics manager. +func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SigmoidGrad", + Type: "ExperimentalStatsAggregatorSummary", Input: []tf.Input{ - y, dy, + iterator, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Convert one or more images from HSV to RGB. -// -// Outputs a tensor of the same shape as the `images` tensor, containing the RGB -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. -// -// See `rgb_to_hsv` for a description of the HSV encoding. +// Makes a new iterator from the given `dataset` and stores it in `iterator`. // -// Arguments: -// images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. +// This operation may be executed multiple times. Each execution will reset the +// iterator in `iterator` to the first element of `dataset`. // -// Returns `images` converted to RGB. -func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) { +// Returns the created operation. +func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "HSVToRGB", + Type: "MakeIterator", Input: []tf.Input{ - images, + dataset, iterator, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates a dataset by applying optimizations to `input_dataset`. +// Component-wise divides a SparseTensor by a dense Tensor. // -// Creates a dataset by applying optimizations to `input_dataset`. +// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not +// the other direction. // // Arguments: -// input_dataset: A variant tensor representing the input dataset. -// optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use. -// +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "OptimizeDataset", + Type: "SparseDenseCwiseDiv", Input: []tf.Input{ - input_dataset, optimizations, + sp_indices, sp_values, sp_shape, dense, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the element-wise min of two SparseTensors. -// -// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. +// Creates a dataset that batches and pads `batch_size` elements from the input. // // Arguments: -// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a -// SparseTensor, in the canonical lexicographic ordering. -// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. -// a_shape: 1-D. Shape of the input SparseTensor. -// b_indices: counterpart to `a_indices` for the other operand. -// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. -// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. // -// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. -func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// padded_shapes: A list of int64 tensors representing the desired padded shapes +// of the corresponding output components. These shapes may be partially +// specified, using `-1` to indicate that a particular dimension should be +// padded to the maximum size of all batch elements. +// padding_values: A list of scalars containing the padding value to use for +// each of the outputs. +// +func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SparseSparseMinimum", + Type: "PaddedBatchDataset", Input: []tf.Input{ - a_indices, a_values, a_shape, b_indices, b_values, b_shape, + input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad. -type ResourceApplyAdamWithAmsgradAttr func(optionalAttr) +// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. +type ResourceApplyMomentumAttr func(optionalAttr) -// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value. +// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. // -// value: If `True`, updating of the var, m, and v tensors will be protected +// value: If `True`, updating of the var and accum tensors will be protected // by a lock; otherwise the behavior is undefined, but may exhibit less // contention. // If not specified, defaults to false -func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr { +func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { return func(m optionalAttr) { m["use_locking"] = value } } -// Update '*var' according to the Adam algorithm. +// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ -// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ -// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -// $$vhat_t := max{vhat_{t-1}, v_t}$$ -// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$ +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update '*var' according to the momentum scheme. Set use_nesterov = True if you +// +// want to use Nesterov momentum. +// +// accum = accum * momentum + grad +// var -= lr * accum // // Arguments: // var_: Should be from a Variable(). -// m: Should be from a Variable(). -// v: Should be from a Variable(). -// vhat: Should be from a Variable(). -// beta1_power: Must be a scalar. -// beta2_power: Must be a scalar. +// accum: Should be from a Variable(). // lr: Scaling factor. Must be a scalar. -// beta1: Momentum factor. Must be a scalar. -// beta2: Momentum factor. Must be a scalar. -// epsilon: Ridge term. Must be a scalar. // grad: The gradient. +// momentum: Momentum. Must be a scalar. // // Returns the created operation. -func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) { +func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -23348,146 +22943,245 @@ func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v t a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyAdamWithAmsgrad", + Type: "ResourceApplyMomentum", Input: []tf.Input{ - var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, + var_, accum, lr, grad, momentum, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) +// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. +type MaxPoolGradGradAttr func(optionalAttr) -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 +// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. // -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { return func(m optionalAttr) { - m["capacity"] = value + m["data_format"] = value } } -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 +// Computes second-order gradients of the maxpooling function. // -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) } -} - -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value + opspec := tf.OpSpec{ + Type: "MaxPoolGradGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Op removes and returns a random (key, value) +// Returns the last element of the input list as well as a list with all but that element. // -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { +// Fails if the list is empty. +// +// input_handle: the input list +// tensor: the withdrawn last element of the list +// element_dtype: the type of elements in the list +// element_shape: the shape of the output tensor +func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", + Type: "TensorListPopBack", Input: []tf.Input{ - indices, + input_handle, element_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Determine the script codes of a given tensor of Unicode integer code points. +// +// This operation converts Unicode code points to script codes corresponding to +// each code point. Script codes correspond to International Components for +// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html. +// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will +// match input shape. +// +// Arguments: +// input: A Tensor of int32 Unicode code points. +// +// Returns A Tensor of int32 script codes corresponding to each input code point. +func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return + opspec := tf.OpSpec{ + Type: "UnicodeScript", + Input: []tf.Input{ + input, + }, } - return key, values + op := scope.AddOperation(opspec) + return op.Output(0) } -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) +// Creates a sequence of numbers. +// +// This operation creates a sequence of numbers that begins at `start` and +// extends by increments of `delta` up to but not including `limit`. +// +// For example: +// +// ``` +// # 'start' is 3 +// # 'limit' is 18 +// # 'delta' is 3 +// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] +// ``` +// +// Arguments: +// start: 0-D (scalar). First entry in the sequence. +// limit: 0-D (scalar). Upper limit of sequence, exclusive. +// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. +// +// Returns 1-D. +func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Range", + Input: []tf.Input{ + start, limit, delta, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// HashTableV2Container sets the optional container attribute to value. +// Computes second-order gradients of the maxpooling function. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value +// Arguments: +// input: The original input. +// grad: 4-D with shape `[batch, height, width, channels]`. Gradients w.r.t. the +// input of `max_pool`. +// argmax: The indices of the maximum values chosen for each output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input of `max_pool`. +func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "MaxPoolGradGradWithArgmax", + Input: []tf.Input{ + input, grad, argmax, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// HashTableV2SharedName sets the optional shared_name attribute to value. +// Return a slice from 'input'. // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value +// The output tensor is a tensor with dimensions described by 'size' +// whose values are extracted from 'input' starting at the offsets in +// 'begin'. +// +// *Requirements*: +// 0 <= begin[i] <= begin[i] + size[i] <= Di for i in [0, n) +// +// Arguments: +// +// begin: begin[i] specifies the offset into the 'i'th dimension of +// 'input' to slice from. +// size: size[i] specifies the number of elements of the 'i'th dimension +// of 'input' to slice. If size[i] is -1, all remaining elements in dimension +// i are included in the slice (i.e. this is equivalent to setting +// size[i] = input.dim_size(i) - begin[i]). +func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Slice", + Input: []tf.Input{ + input, begin, size, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). // -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value +// The Hurwitz zeta function is defined as: +// +// +// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) +func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Zeta", + Input: []tf.Input{ + x, q, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates a non-initialized hash table. +// Returns the cardinality of `input_dataset`. // -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. +// Returns the cardinality of `input_dataset`. // // Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// input_dataset: A variant tensor representing the dataset to return cardinality for. // -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { +// Returns The cardinality of `input_dataset`. Named constants are used to represent +// infinite and unknown cardinality. +func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "HashTableV2", - - Attrs: attrs, + Type: "ExperimentalDatasetCardinality", + Input: []tf.Input{ + input_dataset, + }, } op := scope.AddOperation(opspec) return op.Output(0) @@ -23658,462 +23352,527 @@ func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Out return scope.AddOperation(opspec) } -// Assigns a new value to a variable. +// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad. +type ResourceApplyAdamWithAmsgradAttr func(optionalAttr) + +// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value. // -// Any ReadVariableOp with a control dependency on this op is guaranteed to return -// this value or a subsequent newer value of the variable. +// value: If `True`, updating of the var, m, and v tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the Adam algorithm. +// +// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ +// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ +// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ +// $$vhat_t := max{vhat_{t-1}, v_t}$$ +// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$ // // Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value to set the new tensor to use. +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// v: Should be from a Variable(). +// vhat: Should be from a Variable(). +// beta1_power: Must be a scalar. +// beta2_power: Must be a scalar. +// lr: Scaling factor. Must be a scalar. +// beta1: Momentum factor. Must be a scalar. +// beta2: Momentum factor. Must be a scalar. +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. // // Returns the created operation. -func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { +func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "AssignVariableOp", + Type: "ResourceApplyAdamWithAmsgrad", Input: []tf.Input{ - resource, value, + var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, }, + Attrs: attrs, } return scope.AddOperation(opspec) } -// Strip leading and trailing whitespaces from the Tensor. +// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. +type MapUnstageNoKeyAttr func(optionalAttr) + +// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: A string `Tensor` of any shape. +// REQUIRES: value >= 0 +func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Returns A string `Tensor` of the same shape as the input. -func StringStrip(scope *Scope, input tf.Output) (output tf.Output) { - if scope.Err() != nil { - return +// REQUIRES: value >= 0 +func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["memory_limit"] = value } - opspec := tf.OpSpec{ - Type: "StringStrip", - Input: []tf.Input{ - input, - }, +} + +// MapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns a tensor of ones with the same shape and type as x. -// -// Arguments: -// x: a tensor of type T. +// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns a random (key, value) // -// Returns a tensor of the same shape and type as x but filled with ones. -func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { +// from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "OnesLike", + Type: "MapUnstageNoKey", Input: []tf.Input{ - x, + indices, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstageNoKey", err) + return + } + return key, values } -// The gradient of SparseFillEmptyRows. -// -// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, -// shaped `[N_full]`, where `N_full >= N` and copies data into either -// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and -// `d_default_value` is a scalar. -// -// d_values[j] = grad_values[reverse_index_map[j]] -// d_default_value = sum_{k : 0 .. N_full - 1} ( -// grad_values[k] * 1{k not in reverse_index_map}) +// HashTableV2Attr is an optional argument to HashTableV2. +type HashTableV2Attr func(optionalAttr) + +// HashTableV2Container sets the optional container attribute to value. // -// Arguments: -// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. -// grad_values: 1-D. The gradients from backprop. +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func HashTableV2Container(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// HashTableV2SharedName sets the optional shared_name attribute to value. // -// Returns 1-D. The backprop into values.0-D. The backprop into default_value. -func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { - if scope.Err() != nil { - return +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func HashTableV2SharedName(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - opspec := tf.OpSpec{ - Type: "SparseFillEmptyRowsGrad", - Input: []tf.Input{ - reverse_index_map, grad_values, - }, +} + +// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) } -// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` +// Creates a non-initialized hash table. // -// if < 0, `scale * features` otherwise. +// This op creates a hash table, specifying the type of its keys and values. +// Before using the table you will have to initialize it. After initialization the +// table will be immutable. // -// To be used together with -// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`. -// For correct dropout, use `tf.contrib.nn.alpha_dropout`. +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. // -// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -func Selu(scope *Scope, features tf.Output) (activations tf.Output) { +// Returns Handle to a table. +func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Selu", - Input: []tf.Input{ - features, - }, + Type: "HashTableV2", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SetSizeAttr is an optional argument to SetSize. -type SetSizeAttr func(optionalAttr) +// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug. +type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) -// SetSizeValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func SetSizeValidateIndices(value bool) SetSizeAttr { +// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["validate_indices"] = value + m["table_id"] = value } } -// Number of unique elements along last dimension of input `set`. -// -// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`, -// and `set_shape`. The last dimension contains values in a set, duplicates are -// allowed but ignored. -// -// If `validate_indices` is `True`, this op validates the order and range of `set` -// indices. +// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve Momentum embedding parameters with debug support. // -// Arguments: -// set_indices: 2D `Tensor`, indices of a `SparseTensor`. -// set_values: 1D `Tensor`, values of a `SparseTensor`. -// set_shape: 1D `Tensor`, shape of a `SparseTensor`. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st -// `n-1` dimensions as `set`. Each value is the number of unique elements in -// the corresponding `[0...n-1]` dimension of `set`. -func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) { +// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm.Parameter gradient_accumulators updated by the Momentum optimization algorithm. +func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SetSize", - Input: []tf.Input{ - set_indices, set_values, set_shape, - }, + Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Adds sparse `updates` to an existing tensor according to `indices`. -// -// This operation creates a new tensor by adding sparse `updates` to the passed -// in `tensor`. -// This operation is very similar to `tf.scatter_nd_add`, except that the updates -// are added onto an existing tensor (as opposed to a variable). If the memory -// for the existing tensor cannot be re-used, a copy is made and updated. -// -// `indices` is an integer tensor containing indices into a new tensor of shape -// `shape`. The last dimension of `indices` can be at most the rank of `shape`: +// Enqueue a Tensor on the computation outfeed. // -// indices.shape[-1] <= shape.rank +// Arguments: +// input: A tensor that will be inserted into the outfeed queue. // -// The last dimension of `indices` corresponds to indices into elements -// (if `indices.shape[-1] = shape.rank`) or slices -// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of -// `shape`. `updates` is a tensor with shape -// -// indices.shape[:-1] + shape[indices.shape[-1]:] -// -// The simplest form of tensor_scatter_add is to add individual elements to a -// tensor by index. For example, say we want to add 4 elements in a rank-1 -// tensor with 8 elements. -// -// In Python, this scatter add operation would look like this: -// -// ```python -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// tensor = tf.ones([8], dtype=tf.int32) -// updated = tf.tensor_scatter_add(tensor, indices, updates) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [1, 12, 1, 11, 10, 1, 1, 13] -// -// We can also, insert entire slices of a higher rank tensor all at once. For -// example, if we wanted to insert two slices in the first dimension of a -// rank-3 tensor with two matrices of new values. -// -// In Python, this scatter add operation would look like this: -// -// ```python -// indices = tf.constant([[0], [2]]) -// updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]], -// [[5, 5, 5, 5], [6, 6, 6, 6], -// [7, 7, 7, 7], [8, 8, 8, 8]]]) -// tensor = tf.ones([4, 4, 4]) -// updated = tf.tensor_scatter_add(tensor, indices, updates) -// with tf.Session() as sess: -// print(sess.run(scatter)) -// ``` -// -// The resulting tensor would look like this: -// -// [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], -// [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]], -// [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] -// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, the index is ignored. -// -// Arguments: -// tensor: Tensor to copy/update. -// indices: Index tensor. -// updates: Updates to scatter into output. -// -// Returns A new tensor copied from tensor and updates added according to the indices. -func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { +// Returns the created operation. +func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorScatterAdd", + Type: "OutfeedEnqueue", Input: []tf.Input{ - tensor, indices, updates, + input, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes the sign and the log of the absolute value of the determinant of +// Outputs a `Summary` protocol buffer with a histogram. // -// one or more square matrices. +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. // -// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions -// form square matrices. The outputs are two tensors containing the signs and -// absolute values of the log determinants for all N input submatrices -// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant). -// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU -// is the LU decomposition of the input and P is the corresponding -// permutation matrix. +// This op reports an `InvalidArgument` error if any value is not finite. // // Arguments: -// input: Shape is `[N, M, M]`. +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. // -// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants -// of the N input matrices. Shape is `[N]`. -func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) { +// Returns Scalar. Serialized `Summary` protocol buffer. +func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LogMatrixDeterminant", + Type: "HistogramSummary", Input: []tf.Input{ - input, + tag, values, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Copy a tensor setting everything outside a central band in each innermost matrix -// -// to zero. -// -// The `band` part is computed as follows: -// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a -// tensor with the same shape where -// -// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`. -// -// The indicator function +// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2. +type MutableDenseHashTableV2Attr func(optionalAttr) + +// MutableDenseHashTableV2Container sets the optional container attribute to value. // -// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && -// (num_upper < 0 || (n-m) <= num_upper)`. +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value. // -// For example: +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// If not specified, defaults to false +func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value. // -// ``` -// # if 'input' is [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [-2, -1, 0, 1] -// [-3, -2, -1, 0]], +// value: The shape of each value. +// If not specified, defaults to <> +func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value. // -// tf.matrix_band_part(input, 1, -1) ==> [[ 0, 1, 2, 3] -// [-1, 0, 1, 2] -// [ 0, -1, 0, 1] -// [ 0, 0, -1, 0]], +// value: The initial number of hash table buckets. Must be a power +// to 2. +// If not specified, defaults to 131072 +func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["initial_num_buckets"] = value + } +} + +// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value. // -// tf.matrix_band_part(input, 2, 1) ==> [[ 0, 1, 0, 0] -// [-1, 0, 1, 0] -// [-2, -1, 0, 1] -// [ 0, -2, -1, 0]] -// ``` +// value: The maximum ratio between number of entries and number of +// buckets before growing the table. Must be between 0 and 1. +// If not specified, defaults to 0.8 +func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr { + return func(m optionalAttr) { + m["max_load_factor"] = value + } +} + +// Creates an empty hash table that uses tensors as the backing store. // -// Useful special cases: +// It uses "open addressing" with quadratic reprobing to resolve +// collisions. // -// ``` -// tf.matrix_band_part(input, 0, -1) ==> Upper triangular part. -// tf.matrix_band_part(input, -1, 0) ==> Lower triangular part. -// tf.matrix_band_part(input, 0, 0) ==> Diagonal. -// ``` +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a scalar. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. // // Arguments: -// input: Rank `k` tensor. -// num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire -// lower triangle. -// num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep -// entire upper triangle. +// empty_key: The key used to represent empty key buckets internally. Must not +// be used in insert or lookup operations. // -// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor. -func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) { +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "MatrixBandPart", + Type: "MutableDenseHashTableV2", Input: []tf.Input{ - input, num_lower, num_upper, + empty_key, deleted_key, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Subtracts a value from the current value of a variable. -// -// Any ReadVariableOp with a control dependency on this op is guaranteed to -// see the decremented value or a subsequent newer one. -// -// Arguments: -// resource: handle to the resource in which to store the variable. -// value: the value by which the variable will be incremented. +// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug. +type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr) + +// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// Returns the created operation. -func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AssignSubVariableOp", - Input: []tf.Input{ - resource, value, - }, +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_id"] = value } - return scope.AddOperation(opspec) } -// RestoreAttr is an optional argument to Restore. -type RestoreAttr func(optionalAttr) - -// RestorePreferredShard sets the optional preferred_shard attribute to value. -// -// value: Index of file to open first if multiple files match -// `file_pattern`. -// If not specified, defaults to -1 -func RestorePreferredShard(value int64) RestoreAttr { +// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["preferred_shard"] = value + m["table_name"] = value } } -// Restores a tensor from checkpoint files. -// -// Reads a tensor stored in one or several files. If there are several files (for -// instance because a tensor was saved as slices), `file_pattern` may contain -// wildcard symbols (`*` and `?`) in the filename portion only, not in the -// directory portion. -// -// If a `file_pattern` matches several files, `preferred_shard` can be used to hint -// in which file the requested tensor is likely to be found. This op will first -// open the file at index `preferred_shard` in the list of matching files and try -// to restore tensors from that file. Only if some tensors or tensor slices are -// not found in that first file, then the Op opens all the files. Setting -// `preferred_shard` to match the value passed as the `shard` input -// of a matching `Save` Op may speed up Restore. This attribute only affects -// performance, not correctness. The default value -1 means files are processed in -// order. -// -// See also `RestoreSlice`. +// Retrieve ADAM embedding parameters with debug support. // -// Arguments: -// file_pattern: Must have a single element. The pattern of the files from -// which we read the tensor. -// tensor_name: Must have a single element. The name of the tensor to be -// restored. -// dt: The type of the tensor to be restored. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns The restored tensor. -func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) { +// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm.Parameter gradient_accumulators updated by the ADAM optimization algorithm. +func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dt": dt} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Restore", - Input: []tf.Input{ - file_pattern, tensor_name, - }, + Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear. -type QuantizedResizeBilinearAttr func(optionalAttr) +// CudnnRNNAttr is an optional argument to CudnnRNN. +type CudnnRNNAttr func(optionalAttr) -// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr { +// CudnnRNNRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNRnnMode(value string) CudnnRNNAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["rnn_mode"] = value } } -// Resize quantized `images` to `size` using quantized bilinear interpolation. -// -// Input images and output images must be quantized types. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. -// +// CudnnRNNInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNInputMode(value string) CudnnRNNAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNDirection(value string) CudnnRNNAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNDropout(value float32) CudnnRNNAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNSeed(value int64) CudnnRNNAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNSeed2(value int64) CudnnRNNAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// CudnnRNNIsTraining sets the optional is_training attribute to value. +// If not specified, defaults to true +func CudnnRNNIsTraining(value bool) CudnnRNNAttr { + return func(m optionalAttr) { + m["is_training"] = value + } +} + +// A RNN backed by cuDNN. // +// Computes the RNN from the input and initial states, with respect to the params +// buffer. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) { +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// the actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. Should be +// "unidirectional" or "bidirectional". +// dropout: Dropout probability. When set to 0., dropout is disabled. +// seed: The 1st part of a seed to initialize dropout. +// seed2: The 2nd part of a seed to initialize dropout. +// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size]. +// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size, +// num_units]. +// input_c: For LSTM, a 3-D tensor with the shape of +// [num_layer * dir, batch, num_units]. For other models, it is ignored. +// params: A 1-D tensor that contains the weights and biases in an opaque layout. +// The size must be created through CudnnRNNParamsSize, and initialized +// separately. Note that they might not be compatible across different +// generations. So it is a good idea to save and restore +// output: A 3-D tensor with the shape of [seq_length, batch_size, +// dir * num_units]. +// output_h: The same shape has input_h. +// output_c: The same shape as input_c for LSTM. An empty tensor for other models. +// is_training: Indicates whether this operation is used for inferenece or +// training. +// reserve_space: An opaque tensor that can be used in backprop calculation. It +// is only produced if is_training is false. +func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) { if scope.Err() != nil { return } @@ -24122,32 +23881,56 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedResizeBilinear", + Type: "CudnnRNN", Input: []tf.Input{ - images, size, min, max, + input, input_h, input_c, params, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// DecodeCompressedAttr is an optional argument to DecodeCompressed. +type DecodeCompressedAttr func(optionalAttr) + +// DecodeCompressedCompressionType sets the optional compression_type attribute to value. // -// Arguments: +// value: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// If not specified, defaults to "" +func DecodeCompressedCompressionType(value string) DecodeCompressedAttr { + return func(m optionalAttr) { + m["compression_type"] = value + } +} + +// Decompress strings. // -// num_threads: Identifies the number of threads to use for the private threadpool. +// This op decompresses each element of the `bytes` input `Tensor`, which +// is assumed to be compressed using the given `compression_type`. // +// The `output` is a string `Tensor` of the same shape as `bytes`, +// each element containing the decompressed data from the corresponding +// element in `bytes`. // -func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Arguments: +// bytes: A Tensor of string which is compressed. +// +// Returns A Tensor with the same shape as input `bytes`, uncompressed +// from bytes. +func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ExperimentalPrivateThreadPoolDataset", + Type: "DecodeCompressed", Input: []tf.Input{ - input_dataset, num_threads, + bytes, }, Attrs: attrs, } @@ -24155,54 +23938,54 @@ func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, return op.Output(0) } -// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset. -type ExperimentalParseExampleDatasetAttr func(optionalAttr) +// EnterAttr is an optional argument to Enter. +type EnterAttr func(optionalAttr) -// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value. +// EnterIsConstant sets the optional is_constant attribute to value. +// +// value: If true, the output is constant within the child frame. // If not specified, defaults to false -func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr { +func EnterIsConstant(value bool) EnterAttr { return func(m optionalAttr) { - m["sloppy"] = value + m["is_constant"] = value } } -// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features. +// EnterParallelIterations sets the optional parallel_iterations attribute to value. // -// Arguments: +// value: The number of iterations allowed to run in parallel. +// If not specified, defaults to 10 +func EnterParallelIterations(value int64) EnterAttr { + return func(m optionalAttr) { + m["parallel_iterations"] = value + } +} + +// Creates or finds a child frame, and makes `data` available to the child frame. // +// This op is used together with `Exit` to create loops in the graph. +// The unique `frame_name` is used by the `Executor` to identify frames. If +// `is_constant` is true, `output` is a constant in the child frame; otherwise +// it may be changed in the child frame. At most `parallel_iterations` iterations +// are run in parallel in the child frame. // -// dense_defaults: A dict mapping string keys to `Tensor`s. -// The keys of the dict must match the dense_keys of the feature. -// sparse_keys: A list of string keys in the examples features. -// The results for these keys will be returned as `SparseTensor` objects. -// dense_keys: A list of Ndense string Tensors (scalars). -// The keys expected in the Examples features associated with dense values. -// sparse_types: A list of `DTypes` of the same length as `sparse_keys`. -// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), -// and `tf.string` (`BytesList`) are supported. -// dense_shapes: List of tuples with the same length as `dense_keys`. -// The shape of the data for each dense feature referenced by `dense_keys`. -// Required for any input tensors identified by `dense_keys`. Must be -// either fully defined, or may contain an unknown first dimension. -// An unknown first dimension means the feature is treated as having -// a variable number of blocks, and the output shape along this dimension -// is considered unknown at graph build time. Padding is applied for -// minibatch elements smaller than the maximum number of blocks for the -// given feature along this dimension. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) { +// Arguments: +// data: The tensor to be made available to the child frame. +// frame_name: The name of the child frame. +// +// Returns The same tensor as `data`. +func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"frame_name": frame_name} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ExperimentalParseExampleDataset", + Type: "Enter", Input: []tf.Input{ - input_dataset, num_parallel_calls, tf.OutputList(dense_defaults), + data, }, Attrs: attrs, } @@ -24210,364 +23993,464 @@ func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_ return op.Output(0) } -// SdcaOptimizerAttr is an optional argument to SdcaOptimizer. -type SdcaOptimizerAttr func(optionalAttr) +// TryRpcAttr is an optional argument to TryRpc. +type TryRpcAttr func(optionalAttr) -// SdcaOptimizerAdaptative sets the optional adaptative attribute to value. +// TryRpcProtocol sets the optional protocol attribute to value. // -// value: Whether to use Adaptive SDCA for the inner loop. -// If not specified, defaults to true -func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr { +// value: RPC protocol to use. Empty string means use the default protocol. +// Options include 'grpc'. +// If not specified, defaults to "" +func TryRpcProtocol(value string) TryRpcAttr { return func(m optionalAttr) { - m["adaptative"] = value + m["protocol"] = value } } -// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for +// TryRpcFailFast sets the optional fail_fast attribute to value. // -// linear models with L1 + L2 regularization. As global optimization objective is -// strongly-convex, the optimizer optimizes the dual objective at each step. The -// optimizer applies each update one example at a time. Examples are sampled -// uniformly, and the optimizer is learning rate free and enjoys linear convergence -// rate. +// value: `boolean`. If `true` (default), then failures to connect +// (i.e., the server does not immediately respond) cause an RPC failure. +// If not specified, defaults to true +func TryRpcFailFast(value bool) TryRpcAttr { + return func(m optionalAttr) { + m["fail_fast"] = value + } +} + +// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value. // -// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).
-// Shai Shalev-Shwartz, Tong Zhang. 2012 +// value: `int`. If `0` (default), then the kernel will run the RPC +// request and only time out if the RPC deadline passes or the session times out. +// If this value is greater than `0`, then the op will raise an exception if +// the RPC takes longer than `timeout_in_ms`. +// If not specified, defaults to 0 +func TryRpcTimeoutInMs(value int64) TryRpcAttr { + return func(m optionalAttr) { + m["timeout_in_ms"] = value + } +} + +// Perform batches of RPC requests. // -// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ +// This op asynchronously performs either a single RPC request, or a batch +// of requests. RPC requests are defined by three main parameters: // -// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, -// Peter Richtarik, Martin Takac. 2015 +// - `address` (the host+port or BNS address of the request) +// - `method` (the method name for the request) +// - `request` (the serialized proto string, or vector of strings, +// of the RPC request argument). // -// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 +// For example, if you have an RPC service running on port localhost:2345, +// and its interface is configured with the following proto declaration: +// +// ``` +// service MyService { +// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { +// } +// }; +// ``` +// +// then call this op with arguments: +// +// ``` +// address = "localhost:2345" +// method = "MyService/MyMethod" +// ``` +// +// The `request` tensor is a string tensor representing serialized `MyRequestProto` +// strings; and the output string tensor `response` will have the same shape +// and contain (upon successful completion) corresponding serialized +// `MyResponseProto` strings. +// +// For example, to send a single, empty, `MyRequestProto`, call +// this op with `request = ""`. To send 5 **parallel** empty requests, +// call this op with `request = ["", "", "", "", ""]`. +// +// More generally, one can create a batch of `MyRequestProto` serialized protos +// from regular batched tensors using the `encode_proto` op, and convert +// the response `MyResponseProto` serialized protos to batched tensors +// using the `decode_proto` op. +// +// **NOTE** Working with serialized proto strings is faster than instantiating +// actual proto objects in memory, so no performance degradation is expected +// compared to writing custom kernels for this workflow. +// +// Unlike the standard `Rpc` op, if the connection fails or the remote worker +// returns an error status, this op does **not** reraise the exception. +// Instead, the `status_code` and `status_message` entry for the corresponding RPC +// call is set with the error returned from the RPC call. The `response` tensor +// will contain valid response values for those minibatch entries whose RPCs did +// not fail; the rest of the entries will have empty strings. // // Arguments: -// sparse_example_indices: a list of vectors which contain example indices. -// sparse_feature_indices: a list of vectors which contain feature indices. -// sparse_feature_values: a list of vectors which contains feature value -// associated with each feature group. -// dense_features: a list of matrices which contains the dense feature values. -// example_weights: a vector which contains the weight associated with each -// example. -// example_labels: a vector which contains the label/target associated with each -// example. -// sparse_indices: a list of vectors where each value is the indices which has -// corresponding weights in sparse_weights. This field maybe omitted for the -// dense approach. -// sparse_weights: a list of vectors where each value is the weight associated with -// a sparse feature group. -// dense_weights: a list of vectors where the values are the weights associated -// with a dense feature group. -// example_state_data: a list of vectors containing the example state data. -// loss_type: Type of the primal loss. Currently SdcaSolver supports logistic, -// squared and hinge losses. -// l1: Symmetric l1 regularization strength. -// l2: Symmetric l2 regularization strength. -// num_loss_partitions: Number of partitions of the global loss function. -// num_inner_iterations: Number of iterations per mini-batch. +// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `method` and `request`. +// method: `0-D` or `1-D`. The method address on the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `request`. +// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `method`. // -// Returns a list of vectors containing the updated example state -// data.a list of vectors where each value is the delta -// weights associated with a sparse feature group.a list of vectors where the values are the delta -// weights associated with a dense feature group. -func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) { +// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`. Values correspond to tensorflow Status enum codes.Same shape as `request`. Values correspond to Status messages +// returned from the RPC calls. +func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SdcaOptimizer", + Type: "TryRpc", Input: []tf.Input{ - tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data, + address, method, request, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Add all input tensors element wise. +// +// Arguments: +// inputs: Must all be the same size and shape. +func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - out_example_state_data = op.Output(idx) - if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return - } - if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil { - scope.UpdateErr("SdcaOptimizer", err) - return + opspec := tf.OpSpec{ + Type: "AddN", + Input: []tf.Input{ + tf.OutputList(inputs), + }, } - return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights + op := scope.AddOperation(opspec) + return op.Output(0) } -// Concats all tensors in the list along the 0th dimension. +// RetrieveTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to RetrieveTPUEmbeddingMDLAdagradLightParameters. +type RetrieveTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// Requires that all tensors have the same shape except the first dimension. +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingMDLAdagradLightParametersTableId(value int64) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingMDLAdagradLightParametersTableName(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve MDL Adagrad Light embedding parameters. // -// input_handle: The input list. -// element_shape: The shape of the uninitialized elements in the list. If the first -// dimension is not -1, it is assumed that all list elements have the same -// leading dim. -// leading_dims: The list of leading dims of uninitialized list elements. Used if -// the leading dim of input_handle.element_shape or the element_shape input arg -// is not already set. -// tensor: The concated result. -// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) { +// Returns Parameter parameters updated by the MDL Adagrad Light optimization algorithm.Parameter accumulators updated by the MDL Adagrad Light optimization algorithm.Parameter weights updated by the MDL Adagrad Light optimization algorithm.Parameter benefits updated by the MDL Adagrad Light optimization algorithm. +func RetrieveTPUEmbeddingMDLAdagradLightParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMDLAdagradLightParametersAttr) (parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorListConcatV2", - Input: []tf.Input{ - input_handle, element_shape, leading_dims, - }, + Type: "RetrieveTPUEmbeddingMDLAdagradLightParameters", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve. -type MatrixTriangularSolveAttr func(optionalAttr) +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug. +type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr) -// MatrixTriangularSolveLower sets the optional lower attribute to value. +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: Boolean indicating whether the innermost matrices in `matrix` are -// lower or upper triangular. -// If not specified, defaults to true -func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["lower"] = value + m["table_id"] = value } } -// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// -// @compatibility(numpy) -// Equivalent to scipy.linalg.solve_triangular -// @end_compatibility -// If not specified, defaults to false -func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr { +// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["adjoint"] = value + m["table_name"] = value } } -// Solves systems of linear equations with upper or lower triangular matrices by -// -// backsubstitution. -// -// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form -// square matrices. If `lower` is `True` then the strictly upper triangular part -// of each inner-most matrix is assumed to be zero and not accessed. -// If `lower` is False then the strictly lower triangular part of each inner-most -// matrix is assumed to be zero and not accessed. -// `rhs` is a tensor of shape `[..., M, K]`. -// -// The output is a tensor of shape `[..., M, K]`. If `adjoint` is -// `True` then the innermost matrices in `output` satisfy matrix equations -// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `False` then the strictly then the innermost matrices in -// `output` satisfy matrix equations -// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`. +// Retrieve Adadelta embedding parameters with debug support. // -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns Shape is `[..., M, K]`. -func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) { +// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm.Parameter gradient_accumulators updated by the Adadelta optimization algorithm. +func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatrixTriangularSolve", - Input: []tf.Input{ - matrix, rhs, - }, + Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Saves tensors in V2 checkpoint format. +// MapClearAttr is an optional argument to MapClear. +type MapClearAttr func(optionalAttr) + +// MapClearCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// By default, saves the named tensors in full. If the caller wishes to save -// specific slices of full tensors, "shape_and_slices" should be non-empty strings -// and correspondingly well-formed. +// REQUIRES: value >= 0 +func MapClearCapacity(value int64) MapClearAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// MapClearMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// prefix: Must have a single element. The prefix of the V2 checkpoint to which we -// write the tensors. -// tensor_names: shape {N}. The names of the tensors to be saved. -// shape_and_slices: shape {N}. The slice specs of the tensors to be saved. -// Empty strings indicate that they are non-partitioned tensors. -// tensors: `N` tensors to save. +// REQUIRES: value >= 0 +func MapClearMemoryLimit(value int64) MapClearAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapClearContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapClearContainer(value string) MapClearAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapClearSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapClearSharedName(value string) MapClearAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes all elements in the underlying container. // // Returns the created operation. -func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) { +func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SaveV2", - Input: []tf.Input{ - prefix, tensor_names, shape_and_slices, tf.OutputList(tensors), - }, + Type: "MapClear", + + Attrs: attrs, } return scope.AddOperation(opspec) } -// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode. -type UnicodeTranscodeAttr func(optionalAttr) +// DecodeCSVAttr is an optional argument to DecodeCSV. +type DecodeCSVAttr func(optionalAttr) -// UnicodeTranscodeErrors sets the optional errors attribute to value. +// DecodeCSVFieldDelim sets the optional field_delim attribute to value. // -// value: Error handling policy when there is invalid formatting found in the input. -// The value of 'strict' will cause the operation to produce a InvalidArgument -// error on any invalid input formatting. A value of 'replace' (the default) will -// cause the operation to replace any invalid formatting in the input with the -// `replacement_char` codepoint. A value of 'ignore' will cause the operation to -// skip any invalid formatting in the input and produce no corresponding output -// character. -// If not specified, defaults to "replace" -func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr { +// value: char delimiter to separate fields in a record. +// If not specified, defaults to "," +func DecodeCSVFieldDelim(value string) DecodeCSVAttr { return func(m optionalAttr) { - m["errors"] = value + m["field_delim"] = value } } -// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value. -// -// value: The replacement character codepoint to be used in place of any invalid -// formatting in the input when `errors='replace'`. Any valid unicode codepoint may -// be used. The default value is the default unicode replacement character is -// 0xFFFD or U+65533.) +// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value. // -// Note that for UTF-8, passing a replacement character expressible in 1 byte, such -// as ' ', will preserve string alignment to the source since invalid bytes will be -// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte -// replacement character will preserve byte alignment to the source. -// If not specified, defaults to 65533 -func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr { +// value: If false, treats double quotation marks as regular +// characters inside of the string fields (ignoring RFC 4180, Section 2, +// Bullet 5). +// If not specified, defaults to true +func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr { return func(m optionalAttr) { - m["replacement_char"] = value + m["use_quote_delim"] = value } } -// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// DecodeCSVNaValue sets the optional na_value attribute to value. // -// value: Whether to replace the C0 control characters (00-1F) with the -// `replacement_char`. Default is false. -// If not specified, defaults to false -func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr { +// value: Additional string to recognize as NA/NaN. +// If not specified, defaults to "" +func DecodeCSVNaValue(value string) DecodeCSVAttr { return func(m optionalAttr) { - m["replace_control_characters"] = value + m["na_value"] = value } } -// Transcode the input text from a source encoding to a destination encoding. -// -// The input is a string tensor of any shape. The output is a string tensor of -// the same shape containing the transcoded strings. Output strings are always -// valid unicode. If the input contains invalid encoding positions, the -// `errors` attribute sets the policy for how to deal with them. If the default -// error-handling policy is used, invalid formatting will be substituted in the -// output by the `replacement_char`. If the errors policy is to `ignore`, any -// invalid encoding positions in the input are skipped and not included in the -// output. If it set to `strict` then any invalid formatting will result in an -// InvalidArgument error. -// -// This operation can be used with `output_encoding = input_encoding` to enforce -// correct formatting for inputs even if they are already in the desired encoding. -// -// If the input is prefixed by a Byte Order Mark needed to determine encoding -// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that -// BOM will be consumed and not emitted into the output. If the input encoding -// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is -// interpreted as a non-breaking-space and is preserved in the output (including -// always for UTF-8). +// DecodeCSVSelectCols sets the optional select_cols attribute to value. +// If not specified, defaults to <> +func DecodeCSVSelectCols(value []int64) DecodeCSVAttr { + return func(m optionalAttr) { + m["select_cols"] = value + } +} + +// Convert CSV records to tensors. Each column maps to one tensor. // -// The end result is that if the input is marked as an explicit endianness the -// transcoding is faithful to all codepoints in the source. If it is not marked -// with an explicit endianness, the BOM is not considered part of the string itself -// but as metadata, and so is not preserved in the output. +// RFC 4180 format is expected for the CSV records. +// (https://tools.ietf.org/html/rfc4180) +// Note that we allow leading and trailing spaces with int or float field. // // Arguments: -// input: The text to be processed. Can have any shape. -// input_encoding: Text encoding of the input strings. This is any of the encodings supported -// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. -// output_encoding: The unicode encoding to use in the output. Must be one of -// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian. +// records: Each string is a record/row in the csv and all records should have +// the same format. +// record_defaults: One tensor per column of the input record, with either a +// scalar default value for that column or an empty vector if the column is +// required. // -// Returns A string tensor containing unicode text encoded using `output_encoding`. -func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) { +// Returns Each tensor will have the same shape as records. +func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "UnicodeTranscode", + Type: "DecodeCSV", Input: []tf.Input{ - input, + records, tf.OutputList(record_defaults), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("DecodeCSV", err) + return + } + return output } -// Computes inverse hyperbolic sine of x element-wise. -func Asinh(scope *Scope, x tf.Output) (y tf.Output) { +// Produces the max pool of the input tensor for quantized types. +// +// Arguments: +// input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// ksize: The size of the window for each dimension of the input tensor. +// The length must be 4 to match the number of dimensions of the input. +// strides: The stride of the sliding window for each dimension of the input +// tensor. The length must be 4 to match the number of dimensions of the input. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "Asinh", + Type: "QuantizedMaxPool", Input: []tf.Input{ - x, + input, min_input, max_input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Creates a dataset with a range of values. Corresponds to python's xrange. +// RandomShuffleAttr is an optional argument to RandomShuffle. +type RandomShuffleAttr func(optionalAttr) + +// RandomShuffleSeed sets the optional seed attribute to value. // -// Arguments: -// start: corresponds to start in python's xrange(). -// stop: corresponds to stop in python's xrange(). -// step: corresponds to step in python's xrange(). +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomShuffleSeed(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleSeed2 sets the optional seed2 attribute to value. // +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleSeed2(value int64) RandomShuffleAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Randomly shuffles a tensor along its first dimension. // -func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// The tensor is shuffled along dimension 0, such that each `value[j]` is mapped +// to one and only one `output[i]`. For example, a mapping that might occur for a +// 3x2 tensor is: +// +// ``` +// [[1, 2], [[5, 6], +// [3, 4], ==> [1, 2], +// [5, 6]] [3, 4]] +// ``` +// +// Arguments: +// value: The tensor to be shuffled. +// +// Returns A tensor of same shape and type as `value`, shuffled along its first +// dimension. +func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RangeDataset", + Type: "RandomShuffle", Input: []tf.Input{ - start, stop, step, + value, }, Attrs: attrs, } @@ -24575,102 +24458,162 @@ func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, return op.Output(0) } -// Stops gradient computation. +// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch. +type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr) + +// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value. // -// When executed in a graph, this op outputs its input tensor as-is. +// value: The TPU device to use. Should be >= 0 and less than the number +// of TPU cores in the task on which the node is placed. +// If not specified, defaults to -1 +func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr { + return func(m optionalAttr) { + m["device_ordinal"] = value + } +} + +// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value. // -// When building ops to compute gradients, this op prevents the contribution of -// its inputs to be taken into account. Normally, the gradient generator adds ops -// to a graph to compute the derivatives of a specified 'loss' by recursively -// finding out inputs that contributed to its computation. If you insert this op -// in the graph it inputs are masked from the gradient generator. They are not -// taken into account for computing gradients. +// value: A list of string scalars, one for each embedding table that specify +// how to normalize the embedding activations after weighted summation. +// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have +// the sum of the weights be 0 for 'mean' or the sum of the squared weights be +// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for +// all tables. +// If not specified, defaults to <> +func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr { + return func(m optionalAttr) { + m["combiners"] = value + } +} + +// An op that enqueues TPUEmbedding input indices from a SparseTensor. // -// This is useful any time you want to compute a value with TensorFlow but need -// to pretend that the value was a constant. Some examples include: +// This Op eases the porting of code that uses embedding_lookup_sparse(), +// although some Python preprocessing of the SparseTensor arguments to +// embedding_lookup_sparse() is required to produce the arguments to this Op, +// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training +// step. // -// * The *EM* algorithm where the *M-step* should not involve backpropagation -// through the output of the *E-step*. -// * Contrastive divergence training of Boltzmann machines where, when -// differentiating the energy function, the training must not backpropagate -// through the graph that generated the samples from the model. -// * Adversarial training, where no backprop should happen through the adversarial -// example generation process. -func StopGradient(scope *Scope, input tf.Output) (output tf.Output) { +// The tensors at corresponding positions in the three input lists +// must have the same shape, i.e. rank 1 with dim_size() equal to the total +// number of lookups into the table described by the corresponding table_id. +// +// Arguments: +// sample_indices: A list of rank 1 Tensors specifying the training example and +// feature to which the corresponding embedding_indices and aggregation_weights +// values belong. sample_indices[i] must equal b * nf + f, where nf is the +// number of features from the corresponding table, f is in [0, nf), and +// b is in [0, batch size). +// embedding_indices: A list of rank 1 Tensors, indices into the embedding tables. +// aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per +// (training example, feature) -- aggregation weights. +// mode_override: A string input that overrides the mode specified in the +// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference', +// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set +// in TPUEmbeddingConfiguration is used, otherwise mode_override is used. +// +// Returns the created operation. +func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "StopGradient", + Type: "EnqueueTPUEmbeddingSparseBatch", Input: []tf.Input{ - input, + tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Eagerly executes a python function to compute func(input)->output. The +// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. +type StatelessRandomNormalAttr func(optionalAttr) + +// StatelessRandomNormalDtype sets the optional dtype attribute to value. // -// semantics of the input, output, and attributes are the same as those for -// PyFunc. -func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) { +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. +// +// The outputs are a deterministic function of `shape` and `seed`. +// +// Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// +// Returns Random values with specified shape. +func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"token": token, "Tout": Tout} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "EagerPyFunc", + Type: "StatelessRandomNormal", Input: []tf.Input{ - tf.OutputList(input), + shape, seed, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("EagerPyFunc", err) - return - } - return output + return op.Output(0) } -// Says whether the targets are in the top `K` predictions. +// An Op to exchange data across TPU replicas. // -// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the -// prediction for the target class is among the top `k` predictions among -// all predictions for example `i`. Note that the behavior of `InTopK` differs -// from the `TopK` op in its handling of ties; if multiple classes have the -// same prediction value and straddle the top-`k` boundary, all of those -// classes are considered to be in the top `k`. +// On each replica, the input is split into `split_count` blocks along +// `split_dimension` and send to the other replicas given group_assignment. After +// receiving `split_count` - 1 blocks from other replicas, we concatenate the +// blocks along `concat_dimension` as the output. // -// More formally, let +// For example, suppose there are 2 TPU replicas: +// replica 0 receives input: `[[A, B]]` +// replica 1 receives input: `[[C, D]]` // -// \\(predictions_i\\) be the predictions for all classes for example `i`, -// \\(targets_i\\) be the target class for example `i`, -// \\(out_i\\) be the output for example `i`, +// group_assignment=`[[0, 1]]` +// concat_dimension=0 +// split_dimension=1 +// split_count=2 // -// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$ +// replica 0's output: `[[A], [C]]` +// replica 1's output: `[[B], [D]]` // // Arguments: -// predictions: A `batch_size` x `classes` tensor. -// targets: A `batch_size` vector of class ids. -// k: Number of top elements to look at for computing precision. +// input: The local input to the sum. +// group_assignment: An int32 tensor with shape +// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the +// replica ids in the ith subgroup. +// concat_dimension: The dimension number to concatenate. +// split_dimension: The dimension number to split. +// split_count: The number of splits, this number must equal to the sub-group +// size(group_assignment.get_shape()[1]) // -// Returns Computed Precision at `k` as a `bool Tensor`. -func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) { +// Returns The exchanged result. +func AllToAll(scope *Scope, input tf.Output, group_assignment tf.Output, concat_dimension int64, split_dimension int64, split_count int64) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"k": k} + attrs := map[string]interface{}{"concat_dimension": concat_dimension, "split_dimension": split_dimension, "split_count": split_count} opspec := tf.OpSpec{ - Type: "InTopK", + Type: "AllToAll", Input: []tf.Input{ - predictions, targets, + input, group_assignment, }, Attrs: attrs, } @@ -24678,283 +24621,342 @@ func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (pr return op.Output(0) } -// Returns (x - y)(x - y) element-wise. +// Adds a value to the current value of a variable. // -// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Any ReadVariableOp with a control dependency on this op is guaranteed to +// see the incremented value or a subsequent newer one. +// +// Arguments: +// resource: handle to the resource in which to store the variable. +// value: the value by which the variable will be incremented. +// +// Returns the created operation. +func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SquaredDifference", + Type: "AssignAddVariableOp", Input: []tf.Input{ - x, y, + resource, value, + }, + } + return scope.AddOperation(opspec) +} + +// Real-valued fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform of a real-valued signal +// over the inner-most dimension of `input`. +// +// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the +// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term, +// followed by the `fft_length / 2` positive-frequency terms. +// +// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. +// +// Arguments: +// input: A float32 tensor. +// fft_length: An int32 tensor of shape [1]. The FFT length. +// +// Returns A complex64 tensor of the same rank as `input`. The inner-most +// dimension of `input` is replaced with the `fft_length / 2 + 1` unique +// frequency components of its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.rfft +// @end_compatibility +func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RFFT", + Input: []tf.Input{ + input, fft_length, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// RandomGammaAttr is an optional argument to RandomGamma. -type RandomGammaAttr func(optionalAttr) +// RetrieveTPUEmbeddingAdadeltaParametersAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParameters. +type RetrieveTPUEmbeddingAdadeltaParametersAttr func(optionalAttr) -// RandomGammaSeed sets the optional seed attribute to value. +// RetrieveTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomGammaSeed(value int64) RandomGammaAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingAdadeltaParametersTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersAttr { return func(m optionalAttr) { - m["seed"] = value + m["table_id"] = value } } -// RandomGammaSeed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomGammaSeed2(value int64) RandomGammaAttr { +// RetrieveTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingAdadeltaParametersTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr { return func(m optionalAttr) { - m["seed2"] = value + m["table_name"] = value } } -// Outputs random values from the Gamma distribution(s) described by alpha. +// Retrieve Adadelta embedding parameters. // -// This op uses the algorithm by Marsaglia et al. to acquire samples via -// transformation-rejection from pairs of uniform and normal random variables. -// See http://dl.acm.org/citation.cfm?id=358414 -// -// Arguments: -// shape: 1-D integer tensor. Shape of independent samples to draw from each -// distribution described by the shape parameters given in alpha. -// alpha: A tensor in which each scalar is a "shape" parameter describing the -// associated gamma distribution. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns A tensor with shape `shape + shape(alpha)`. Each slice -// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for -// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. -func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { +// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm. +func RetrieveTPUEmbeddingAdadeltaParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomGamma", - Input: []tf.Input{ - shape, alpha, - }, + Type: "RetrieveTPUEmbeddingAdadeltaParameters", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Convert the quantized 'input' tensor into a lower-precision 'output', using the +// UpperBoundAttr is an optional argument to UpperBound. +type UpperBoundAttr func(optionalAttr) + +// UpperBoundOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func UpperBoundOutType(value tf.DataType) UpperBoundAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Applies upper_bound(sorted_search_values, values) along each row. // -// actual distribution of the values to maximize the usage of the lower bit depth -// and adjusting the output min and max ranges accordingly. +// Each set of rows with the same index in (sorted_inputs, values) is treated +// independently. The resulting row is the equivalent of calling +// `np.searchsorted(sorted_inputs, values, side='right')`. // -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. +// The result is not a global index to the entire +// `Tensor`, but rather just the index in the last dimension. // -// This operator tries to squeeze as much precision as possible into an output with -// a lower bit depth by calculating the actual min and max values found in the -// data. For example, maybe that quint16 input has no values lower than 16,384 and -// none higher than 49,152. That means only half the range is actually needed, all -// the float interpretations are between -0.5f and 0.5f, so if we want to compress -// the data into a quint8 output, we can use that range rather than the theoretical -// -1.0f to 1.0f that is suggested by the input min and max. +// A 2-D example: +// sorted_sequence = [[0, 3, 9, 9, 10], +// [1, 2, 3, 4, 5]] +// values = [[2, 4, 9], +// [0, 2, 6]] // -// In practice, this is most useful for taking output from operations like -// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and -// may have large potential output ranges, but in practice have a distribution of -// input values that only uses a small fraction of the possible range. By feeding -// that output into this operator, we can reduce it from 32 bits down to 8 with -// minimal loss of accuracy. +// result = UpperBound(sorted_sequence, values) // -// Arguments: +// result == [[1, 2, 4], +// [0, 2, 5]] // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. +// Arguments: +// sorted_inputs: 2-D Tensor where each row is ordered. +// values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains +// the values that will be searched for in `sorted_search_values`. // -// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. -func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns A `Tensor` with the same shape as `values`. It contains the last scalar index +// into the last dimension where values can be inserted without changing the +// ordered property. +func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "QuantizeDownAndShrinkRange", + Type: "UpperBound", Input: []tf.Input{ - input, input_min, input_max, + sorted_inputs, values, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns element-wise remainder of division. This emulates C semantics in that +// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad. +type FractionalMaxPoolGradAttr func(optionalAttr) + +// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value. // -// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * -// y + truncate_mod(x, y) = x`. +// value: When set to True, it means when pooling, the values at the boundary +// of adjacent pooling cells are used by both cells. For example: // -// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TruncateMod", - Input: []tf.Input{ - x, y, - }, +// `index 0 1 2 3 4` +// +// `value 20 5 16 3 7` +// +// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. +// The result would be [20, 16] for fractional max pooling. +// If not specified, defaults to false +func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr { + return func(m optionalAttr) { + m["overlapping"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes offsets of concat inputs within its output. -// -// For example: -// -// ``` -// # 'x' is [2, 2, 7] -// # 'y' is [2, 3, 7] -// # 'z' is [2, 5, 7] -// concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0] -// ``` -// -// This is typically used by gradient computations for a concat operation. +// Computes gradient of the FractionalMaxPool function. // // Arguments: -// concat_dim: The dimension along which to concatenate. -// shape: The `N` int32 vectors representing shape of tensors being concatenated. +// orig_input: Original input for `fractional_max_pool` +// orig_output: Original output for `fractional_max_pool` +// out_backprop: 4-D with shape `[batch, height, width, channels]`. Gradients +// w.r.t. the output of `fractional_max_pool`. +// row_pooling_sequence: row pooling sequence, form pooling region with +// col_pooling_sequence. +// col_pooling_sequence: column pooling sequence, form pooling region with +// row_pooling sequence. // -// Returns The `N` int32 vectors representing the starting offset -// of input tensors within the concatenated output. -func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset []tf.Output) { +// Returns 4-D. Gradients w.r.t. the input of `fractional_max_pool`. +func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ConcatOffset", + Type: "FractionalMaxPoolGrad", Input: []tf.Input{ - concat_dim, tf.OutputList(shape), + orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if offset, idx, err = makeOutputList(op, idx, "offset"); err != nil { - scope.UpdateErr("ConcatOffset", err) - return - } - return offset + return op.Output(0) } -// Compute the lower regularized incomplete Gamma function `P(a, x)`. -// -// The lower regularized incomplete Gamma function is defined as: -// +// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse. +type SparseReduceMaxSparseAttr func(optionalAttr) + +// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value. // -// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\) +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the max of elements across dimensions of a SparseTensor. // -// where +// This Op takes a SparseTensor and is the sparse counterpart to +// `tf.reduce_max()`. In contrast to SparseReduceMax, this Op returns a +// SparseTensor. // -// \\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\) +// Reduces `sp_input` along the dimensions given in `reduction_axes`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained +// with length 1. // -// is the lower incomplete Gamma function. +// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor +// with a single element is returned. Additionally, the axes can be negative, +// which are interpreted according to the indexing rules in Python. // -// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete -// Gamma function. -func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { +// Arguments: +// input_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// input_values: 1-D. `N` non-empty values corresponding to `input_indices`. +// input_shape: 1-D. Shape of the input SparseTensor. +// reduction_axes: 1-D. Length-`K` vector containing the reduction axes. +func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Igamma", + Type: "SparseReduceMaxSparse", Input: []tf.Input{ - a, x, + input_indices, input_values, input_shape, reduction_axes, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Compute the Hurwitz zeta function \\(\zeta(x, q)\\). +// Convert one or more images from HSV to RGB. // -// The Hurwitz zeta function is defined as: +// Outputs a tensor of the same shape as the `images` tensor, containing the RGB +// value of the pixels. The output is only well defined if the value in `images` +// are in `[0,1]`. // +// See `rgb_to_hsv` for a description of the HSV encoding. // -// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\) -func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) { +// Arguments: +// images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3. +// +// Returns `images` converted to RGB. +func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Zeta", + Type: "HSVToRGB", Input: []tf.Input{ - x, q, + images, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the cardinality of `input_dataset`. -// -// Returns the cardinality of `input_dataset`. -// -// Arguments: -// input_dataset: A variant tensor representing the dataset to return cardinality for. +// Computes the gradient of the sigmoid of `x` wrt its input. // -// Returns The cardinality of `input_dataset`. Named constants are used to represent -// infinite and unknown cardinality. -func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) { +// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and +// `dy` is the corresponding input gradient. +func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ExperimentalDatasetCardinality", + Type: "SigmoidGrad", Input: []tf.Input{ - input_dataset, + y, dy, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that executes a SQL query and emits rows of the result set. +// Creates a dataset that changes the batch size. +// +// Creates a dataset that changes the batch size of the dataset to current batch +// size // num_workers. // // Arguments: -// driver_name: The database type. Currently, the only supported type is 'sqlite'. -// data_source_name: A connection string to connect to the database. -// query: A SQL query to execute. +// input_dataset: A variant tensor representing the input dataset. +// num_workers: A scalar representing the number of workers to distribute this batch across. As +// a result of this transformation the current batch size would end up being +// divided by this parameter. // // -func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalSqlDataset", + Type: "ExperimentalRebatchDataset", Input: []tf.Input{ - driver_name, data_source_name, query, + input_dataset, num_workers, }, Attrs: attrs, } @@ -24962,57 +24964,58 @@ func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_nam return op.Output(0) } -// Compute the regularized incomplete beta integral \\(I_x(a, b)\\). -// -// The regularized incomplete beta integral is defined as: -// -// -// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\) -// -// where +// Creates a dataset that emits the outputs of `input_dataset` `count` times. // +// Arguments: // -// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\) +// count: A scalar representing the number of times that `input_dataset` should +// be repeated. A value of `-1` indicates that it should be repeated infinitely. // // -// is the incomplete beta function and \\(B(a, b)\\) is the *complete* -// beta function. -func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) { +func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Betainc", + Type: "RepeatDataset", Input: []tf.Input{ - a, b, x, + input_dataset, count, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ShapeAttr is an optional argument to Shape. -type ShapeAttr func(optionalAttr) +// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA. +type ResourceApplyAdagradDAAttr func(optionalAttr) -// ShapeOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_INT32 -func ShapeOutType(value tf.DataType) ShapeAttr { +// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value. +// +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr { return func(m optionalAttr) { - m["out_type"] = value + m["use_locking"] = value } } -// Returns the shape of a tensor. -// -// This operation returns a 1-D integer tensor representing the shape of `input`. +// Update '*var' according to the proximal adagrad scheme. // -// For example: +// Arguments: +// var_: Should be from a Variable(). +// gradient_accumulator: Should be from a Variable(). +// gradient_squared_accumulator: Should be from a Variable(). +// grad: The gradient. +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// global_step: Training step number. Must be a scalar. // -// ``` -// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] -// shape(t) ==> [2, 2, 3] -// ``` -func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -25021,82 +25024,74 @@ func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "Shape", + Type: "ResourceApplyAdagradDA", Input: []tf.Input{ - input, + var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes fingerprints of the input strings. +// Creates a TensorList which, when stacked, has the value of `tensor`. // -// Arguments: -// input: vector of strings to compute fingerprints on. +// Each tensor in the result list corresponds to one row of the input tensor. // -// Returns a (N,2) shaped matrix where N is the number of elements in the input -// vector. Each row contains the low and high parts of the fingerprint. -func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { +// tensor: The input tensor. +// output_handle: The list. +func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SdcaFprint", + Type: "TensorListFromTensor", Input: []tf.Input{ - input, + tensor, element_shape, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the power of one value to another. -// -// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for -// corresponding elements in `x` and `y`. For example: +// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU. +type ConfigureDistributedTPUAttr func(optionalAttr) + +// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value. // -// ``` -// # tensor 'x' is [[2, 2]], [3, 3]] -// # tensor 'y' is [[8, 16], [2, 3]] -// tf.pow(x, y) ==> [[256, 65536], [9, 27]] -// ``` -func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Pow", - Input: []tf.Input{ - x, y, - }, +// value: Reserved. Do not use. +// If not specified, defaults to "" +func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["embedding_config"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// QuantizedReluXAttr is an optional argument to QuantizedReluX. -type QuantizedReluXAttr func(optionalAttr) - -// QuantizedReluXOutType sets the optional out_type attribute to value. -// If not specified, defaults to DT_QUINT8 -func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { +// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value. +// +// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that +// describes the embedding lookups of the program. +// If not specified, defaults to "" +func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr { return func(m optionalAttr) { - m["out_type"] = value + m["tpu_embedding_config"] = value } } -// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` -// -// Arguments: +// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value. // +// value: Reserved. Do not use. +// If not specified, defaults to false +func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr { + return func(m optionalAttr) { + m["is_global_init"] = value + } +} + +// Sets up the centralized structures for a distributed TPU system. // -// min_features: The float value that the lowest quantized value represents. -// max_features: The float value that the highest quantized value represents. -// -// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. -func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { +// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU +// topology. +func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) { if scope.Err() != nil { return } @@ -25105,106 +25100,149 @@ func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_f a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedReluX", - Input: []tf.Input{ - features, max_value, min_features, max_features, - }, + Type: "ConfigureDistributedTPU", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Returns the truth value of (x < y) element-wise. +// Reshapes a quantized tensor as per the Reshape op. // -// *NOTE*: `Less` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// ``` +// +// Arguments: +// +// shape: Defines the shape of the output tensor. +// input_min: The minimum value of the input. +// input_max: The maximum value of the input. +// +// Returns This value is copied from input_min.This value is copied from input_max. +func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Less", + Type: "QuantizedReshape", Input: []tf.Input{ - x, y, + tensor, shape, input_min, input_max, }, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) +// PriorityQueueV2Attr is an optional argument to PriorityQueueV2. +type PriorityQueueV2Attr func(optionalAttr) -// RandomPoissonSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed(value int64) RandomPoissonAttr { +// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value. +// +// value: The type of each component in a value. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr { return func(m optionalAttr) { - m["seed"] = value + m["component_types"] = value } } -// RandomPoissonSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func RandomPoissonSeed2(value int64) RandomPoissonAttr { +// PriorityQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr { return func(m optionalAttr) { - m["seed2"] = value + m["capacity"] = value } } -// Use RandomPoissonV2 instead. +// PriorityQueueV2Container sets the optional container attribute to value. // -// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 -func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func PriorityQueueV2Container(value string) PriorityQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value } - opspec := tf.OpSpec{ - Type: "RandomPoisson", - Input: []tf.Input{ - shape, rate, - }, - Attrs: attrs, +} + +// PriorityQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the truth value of (x >= y) element-wise. +// A queue that produces elements sorted by the first component value. // -// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Note that the PriorityQueue requires the first component of any element +// to be a scalar int64, in addition to the other elements declared by +// component_types. Therefore calls to Enqueue and EnqueueMany (resp. Dequeue +// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra +// entry in their input (resp. output) lists. +// +// Arguments: +// shapes: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// +// Returns The handle to the queue. +func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shapes": shapes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "GreaterEqual", - Input: []tf.Input{ - x, y, - }, + Type: "PriorityQueueV2", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ApproximateEqualAttr is an optional argument to ApproximateEqual. -type ApproximateEqualAttr func(optionalAttr) +// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent. +type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr) -// ApproximateEqualTolerance sets the optional tolerance attribute to value. -// If not specified, defaults to 1e-05 -func ApproximateEqualTolerance(value float32) ApproximateEqualAttr { +// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value. +// +// value: If True, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr { return func(m optionalAttr) { - m["tolerance"] = value + m["use_locking"] = value } } -// Returns the truth value of abs(x-y) < tolerance element-wise. -func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) { +// Sparse update '*var' as FOBOS algorithm with fixed learning rate. +// +// That is for rows we have grad for, we update var as follows: +// prox_v = var - alpha * grad +// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +// +// Arguments: +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// grad: The gradient. +// indices: A vector of indices into the first dimension of var and accum. +// +// Returns the created operation. +func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -25213,215 +25251,141 @@ func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...Approx a(attrs) } opspec := tf.OpSpec{ - Type: "ApproximateEqual", + Type: "ResourceSparseApplyProximalGradientDescent", Input: []tf.Input{ - x, y, + var_, alpha, l1, l2, grad, indices, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the truth value of x OR y element-wise. -// -// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "LogicalOr", - Input: []tf.Input{ - x, y, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Selects elements from `x` or `y`, depending on `condition`. -// -// The `x`, and `y` tensors must all have the same shape, and the -// output will also have that shape. -// -// The `condition` tensor must be a scalar if `x` and `y` are scalars. -// If `x` and `y` are vectors or higher rank, then `condition` must be either a -// scalar, a vector with size matching the first dimension of `x`, or must have -// the same shape as `x`. -// -// The `condition` tensor acts as a mask that chooses, based on the value at each -// element, whether the corresponding element / row in the output should be -// taken from `x` (if true) or `y` (if false). -// -// If `condition` is a vector and `x` and `y` are higher rank matrices, then -// it chooses which row (outer dimension) to copy from `x` and `y`. -// If `condition` has the same shape as `x` and `y`, then it chooses which -// element to copy from `x` and `y`. -// -// For example: -// -// ```python -// # 'condition' tensor is [[True, False] -// # [False, True]] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) # => [[1, 6], [7, 4]] -// +// Check if the input matches the regex pattern. // -// # 'condition' tensor is [True, False] -// # 't' is [[1, 2], -// # [3, 4]] -// # 'e' is [[5, 6], -// # [7, 8]] -// select(condition, t, e) ==> [[1, 2], -// [7, 8]] +// The input is a string tensor of any shape. The pattern is the +// regular expression to be matched with every element of the input tensor. +// The boolean values (True or False) of the output tensor indicate +// if the input matches the regex pattern provided. // -// ``` +// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax) // // Arguments: +// input: A string tensor of the text to be processed. +// pattern: The regular expression to match the input. // -// x: = A `Tensor` which may have the same shape as `condition`. -// If `condition` is rank 1, `x` may have higher rank, -// but its first dimension must match the size of `condition`. -// y: = A `Tensor` with the same type and shape as `x`. -// -// Returns = A `Tensor` with the same type and shape as `x` and `y`. -func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) { +// Returns A bool tensor with the same shape as `input`. +func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"pattern": pattern} opspec := tf.OpSpec{ - Type: "Select", + Type: "StaticRegexFullMatch", Input: []tf.Input{ - condition, x, y, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatMulAttr is an optional argument to MatMul. -type MatMulAttr func(optionalAttr) +// OutfeedDequeueAttr is an optional argument to OutfeedDequeue. +type OutfeedDequeueAttr func(optionalAttr) -// MatMulTransposeA sets the optional transpose_a attribute to value. +// OutfeedDequeueDeviceOrdinal sets the optional device_ordinal attribute to value. // -// value: If true, "a" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeA(value bool) MatMulAttr { +// value: The TPU device to use. This should be -1 when the Op +// is running on a TPU device, and >= 0 when the Op is running on the CPU +// device. +// If not specified, defaults to -1 +func OutfeedDequeueDeviceOrdinal(value int64) OutfeedDequeueAttr { return func(m optionalAttr) { - m["transpose_a"] = value + m["device_ordinal"] = value } } -// MatMulTransposeB sets the optional transpose_b attribute to value. +// Retrieves a single tensor from the computation outfeed. // -// value: If true, "b" is transposed before multiplication. -// If not specified, defaults to false -func MatMulTransposeB(value bool) MatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// Multiply the matrix "a" by the matrix "b". +// This operation will block indefinitely until data is available. // -// The inputs must be two-dimensional matrices and the inner dimension of -// "a" (after being transposed if transpose_a is true) must match the -// outer dimension of "b" (after being transposed if transposed_b is -// true). +// Arguments: +// dtype: The type of elements in the tensor. +// shape: The shape of the tensor. // -// *Note*: The default kernel implementation for MatMul on GPUs uses -// cublas. -func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) { +// Returns A tensor that will be read from the device outfeed. +func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...OutfeedDequeueAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MatMul", - Input: []tf.Input{ - a, b, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} + Type: "OutfeedDequeue", -// Serializes the tree handle to a proto -// -// Arguments: -// tree_handle: Handle to the tree resource to be serialized. -// -// Returns Serialied proto string of the tree resource. -func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeSerialize", - Input: []tf.Input{ - tree_handle, - }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SparseMatMulAttr is an optional argument to SparseMatMul. -type SparseMatMulAttr func(optionalAttr) - -// SparseMatMulTransposeA sets the optional transpose_a attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeA(value bool) SparseMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} +// RandomPoissonV2Attr is an optional argument to RandomPoissonV2. +type RandomPoissonV2Attr func(optionalAttr) -// SparseMatMulTransposeB sets the optional transpose_b attribute to value. -// If not specified, defaults to false -func SparseMatMulTransposeB(value bool) SparseMatMulAttr { +// RandomPoissonV2Seed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { - m["transpose_b"] = value + m["seed"] = value } } -// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulAIsSparse(value bool) SparseMatMulAttr { +// RandomPoissonV2Seed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr { return func(m optionalAttr) { - m["a_is_sparse"] = value + m["seed2"] = value } } -// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value. -// If not specified, defaults to false -func SparseMatMulBIsSparse(value bool) SparseMatMulAttr { +// RandomPoissonV2Dtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_INT64 +func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr { return func(m optionalAttr) { - m["b_is_sparse"] = value + m["dtype"] = value } } -// Multiply matrix "a" by matrix "b". +// Outputs random values from the Poisson distribution(s) described by rate. // -// The inputs must be two-dimensional matrices and the inner dimension of "a" must -// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not -// `SparseTensor`s. This op is optimized for the case where at least one of "a" or -// "b" is sparse, in the sense that they have a large proportion of zero values. -// The breakeven for using this versus a dense matrix multiply on one platform was -// 30% zero values in the sparse matrix. +// This op uses two algorithms, depending on rate. If rate >= 10, then +// the algorithm by Hormann is used to acquire samples via +// transformation-rejection. +// See http://www.sciencedirect.com/science/article/pii/0167668793909974. // -// The gradient computation of this operation will only take advantage of sparsity -// in the input gradient when that gradient comes from a Relu. -func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) { +// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform +// random variables. +// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer +// Programming, Volume 2. Addison Wesley +// +// Arguments: +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in rate. +// rate: A tensor in which each scalar is a "rate" parameter describing the +// associated poisson distribution. +// +// Returns A tensor with shape `shape + shape(rate)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `rate[i0, i1, ...iN]`. +func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -25430,9 +25394,9 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM a(attrs) } opspec := tf.OpSpec{ - Type: "SparseMatMul", + Type: "RandomPoissonV2", Input: []tf.Input{ - a, b, + shape, rate, }, Attrs: attrs, } @@ -25440,146 +25404,133 @@ func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatM return op.Output(0) } -// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle. -type ExperimentalThreadPoolHandleAttr func(optionalAttr) +// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug. +type RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr) -// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value. +// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: The maximum degree of parallelism to use within operations that execute on this -// threadpool. -// If not specified, defaults to 1 -func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr { - return func(m optionalAttr) { - m["max_intra_op_parallelism"] = value - } -} - -// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["container"] = value + m["table_id"] = value } } -// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value. +// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value. // If not specified, defaults to "" -func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr { +func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["table_name"] = value } } -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// Retrieve RMSProp embedding parameters with debug support. // -// Arguments: -// num_threads: The number of threads in the thread pool. -// display_name: A human-readable name for the threads that may be visible in some -// visualizations. -// threadpool. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset -// ops. -func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) { +// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm.Parameter gradient_accumulators updated by the RMSProp optimization algorithm. +func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ExperimentalThreadPoolHandle", + Type: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug", Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams. -type CudnnRNNCanonicalToParamsAttr func(optionalAttr) - -// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value - } + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["input_mode"] = value +// Computes the gradient for the rsqrt of `x` wrt its input. +// +// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy` +// is the corresponding input gradient. +func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } -} - -// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["direction"] = value + opspec := tf.OpSpec{ + Type: "RsqrtGrad", + Input: []tf.Input{ + y, dy, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["dropout"] = value - } -} - -// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr { - return func(m optionalAttr) { - m["seed"] = value +// Encode audio data using the WAV file format. +// +// This operation will generate a string suitable to be saved out to create a .wav +// audio file. It will be encoded in the 16-bit PCM format. It takes in float +// values in the range -1.0f to 1.0f, and any outside that value will be clamped to +// that range. +// +// `audio` is a 2-D float Tensor of shape `[length, channels]`. +// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100). +// +// Arguments: +// audio: 2-D with shape `[length, channels]`. +// sample_rate: Scalar containing the sample frequency. +// +// Returns 0-D. WAV-encoded file contents. +func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) { + if scope.Err() != nil { + return } + opspec := tf.OpSpec{ + Type: "EncodeWav", + Input: []tf.Input{ + audio, sample_rate, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr { +// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax. +type ResourceApplyAdaMaxAttr func(optionalAttr) + +// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, m, and v tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_locking"] = value } } -// Converts CudnnRNN params from canonical form to usable form. +// Update '*var' according to the AdaMax algorithm. // -// Writes a set of weights into the opaque params buffer so they can be used in -// upcoming training or inferences. +// m_t <- beta1 * m_{t-1} + (1 - beta1) * g +// v_t <- max(beta2 * v_{t-1}, abs(g)) +// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon) // -// Note that the params buffer may not be compatible across different GPUs. So any -// save and restoration should be converted to and from the canonical weights and -// biases. +// Arguments: +// var_: Should be from a Variable(). +// m: Should be from a Variable(). +// v: Should be from a Variable(). +// beta1_power: Must be a scalar. +// lr: Scaling factor. Must be a scalar. +// beta1: Momentum factor. Must be a scalar. +// beta2: Momentum factor. Must be a scalar. +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. // -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// weights: the canonical form of weights that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// biases: the canonical form of biases that can be used for saving -// and restoration. They are more likely to be compatible across different -// generations. -// num_params: number of parameter sets for all layers. -// Each layer may contain multiple parameter sets, with each set consisting of -// a weight matrix and a bias vector. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) { +// Returns the created operation. +func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -25588,60 +25539,54 @@ func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf. a(attrs) } opspec := tf.OpSpec{ - Type: "CudnnRNNCanonicalToParams", + Type: "ResourceApplyAdaMax", Input: []tf.Input{ - num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases), + var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Creates a dataset containing elements of first component of `input_dataset` having true in the last component. -func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) { +// Computes atan of x element-wise. +func Atan(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "FilterByLastComponentDataset", + Type: "Atan", Input: []tf.Input{ - input_dataset, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SumAttr is an optional argument to Sum. -type SumAttr func(optionalAttr) +// AssertAttr is an optional argument to Assert. +type AssertAttr func(optionalAttr) -// SumKeepDims sets the optional keep_dims attribute to value. +// AssertSummarize sets the optional summarize attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func SumKeepDims(value bool) SumAttr { +// value: Print this many entries of each tensor. +// If not specified, defaults to 3 +func AssertSummarize(value int64) AssertAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["summarize"] = value } } -// Computes the sum of elements across dimensions of a tensor. +// Asserts that the given condition is true. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// If `condition` evaluates to false, print the list of tensors in `data`. +// `summarize` determines how many entries of the tensors to print. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// condition: The condition to evaluate. +// data: The tensors to print out when condition is false. // -// Returns The reduced tensor. -func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) { +// Returns the created operation. +func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -25650,464 +25595,340 @@ func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (ou a(attrs) } opspec := tf.OpSpec{ - Type: "Sum", + Type: "Assert", Input: []tf.Input{ - input, axis, + condition, tf.OutputList(data), }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// EnterAttr is an optional argument to Enter. -type EnterAttr func(optionalAttr) +// LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdagradParametersGradAccumDebug. +type LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr) -// EnterIsConstant sets the optional is_constant attribute to value. +// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If true, the output is constant within the child frame. -// If not specified, defaults to false -func EnterIsConstant(value bool) EnterAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["is_constant"] = value + m["table_id"] = value } } -// EnterParallelIterations sets the optional parallel_iterations attribute to value. -// -// value: The number of iterations allowed to run in parallel. -// If not specified, defaults to 10 -func EnterParallelIterations(value int64) EnterAttr { +// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["parallel_iterations"] = value + m["table_name"] = value } } -// Creates or finds a child frame, and makes `data` available to the child frame. +// Load Adagrad embedding parameters with debug support. // -// This op is used together with `Exit` to create loops in the graph. -// The unique `frame_name` is used by the `Executor` to identify frames. If -// `is_constant` is true, `output` is a constant in the child frame; otherwise -// it may be changed in the child frame. At most `parallel_iterations` iterations -// are run in parallel in the child frame. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// data: The tensor to be made available to the child frame. -// frame_name: The name of the child frame. +// parameters: Value of parameters used in the Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the Adagrad optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the Adagrad optimization algorithm. // -// Returns The same tensor as `data`. -func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAttr) (output tf.Output) { +// +// +// Returns the created operation. +func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"frame_name": frame_name} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Enter", + Type: "LoadTPUEmbeddingAdagradParametersGradAccumDebug", Input: []tf.Input{ - data, + parameters, accumulators, gradient_accumulators, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Add all input tensors element wise. -// -// Arguments: -// inputs: Must all be the same size and shape. -func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AddN", - Input: []tf.Input{ - tf.OutputList(inputs), - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// TryRpcAttr is an optional argument to TryRpc. -type TryRpcAttr func(optionalAttr) - -// TryRpcProtocol sets the optional protocol attribute to value. -// -// value: RPC protocol to use. Empty string means use the default protocol. -// Options include 'grpc'. -// If not specified, defaults to "" -func TryRpcProtocol(value string) TryRpcAttr { - return func(m optionalAttr) { - m["protocol"] = value - } -} +// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingFTRLParametersGradAccumDebug. +type RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr) -// TryRpcFailFast sets the optional fail_fast attribute to value. +// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: `boolean`. If `true` (default), then failures to connect -// (i.e., the server does not immediately respond) cause an RPC failure. -// If not specified, defaults to true -func TryRpcFailFast(value bool) TryRpcAttr { +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["fail_fast"] = value + m["table_id"] = value } } -// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value. -// -// value: `int`. If `0` (default), then the kernel will run the RPC -// request and only time out if the RPC deadline passes or the session times out. -// If this value is greater than `0`, then the op will raise an exception if -// the RPC takes longer than `timeout_in_ms`. -// If not specified, defaults to 0 -func TryRpcTimeoutInMs(value int64) TryRpcAttr { +// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["timeout_in_ms"] = value + m["table_name"] = value } } -// Perform batches of RPC requests. -// -// This op asynchronously performs either a single RPC request, or a batch -// of requests. RPC requests are defined by three main parameters: -// -// - `address` (the host+port or BNS address of the request) -// - `method` (the method name for the request) -// - `request` (the serialized proto string, or vector of strings, -// of the RPC request argument). -// -// For example, if you have an RPC service running on port localhost:2345, -// and its interface is configured with the following proto declaration: -// -// ``` -// service MyService { -// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { -// } -// }; -// ``` -// -// then call this op with arguments: -// -// ``` -// address = "localhost:2345" -// method = "MyService/MyMethod" -// ``` -// -// The `request` tensor is a string tensor representing serialized `MyRequestProto` -// strings; and the output string tensor `response` will have the same shape -// and contain (upon successful completion) corresponding serialized -// `MyResponseProto` strings. -// -// For example, to send a single, empty, `MyRequestProto`, call -// this op with `request = ""`. To send 5 **parallel** empty requests, -// call this op with `request = ["", "", "", "", ""]`. -// -// More generally, one can create a batch of `MyRequestProto` serialized protos -// from regular batched tensors using the `encode_proto` op, and convert -// the response `MyResponseProto` serialized protos to batched tensors -// using the `decode_proto` op. -// -// **NOTE** Working with serialized proto strings is faster than instantiating -// actual proto objects in memory, so no performance degradation is expected -// compared to writing custom kernels for this workflow. -// -// Unlike the standard `Rpc` op, if the connection fails or the remote worker -// returns an error status, this op does **not** reraise the exception. -// Instead, the `status_code` and `status_message` entry for the corresponding RPC -// call is set with the error returned from the RPC call. The `response` tensor -// will contain valid response values for those minibatch entries whose RPCs did -// not fail; the rest of the entries will have empty strings. +// Retrieve FTRL embedding parameters with debug support. // -// Arguments: -// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `method` and `request`. -// method: `0-D` or `1-D`. The method address on the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `request`. -// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `method`. +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`. Values correspond to tensorflow Status enum codes.Same shape as `request`. Values correspond to Status messages -// returned from the RPC calls. -func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) { +// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm.Parameter gradient_accumulators updated by the FTRL optimization algorithm. +func RetrieveTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TryRpc", + Type: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) +} + +// A dataset that splits the elements of its input into multiple elements. +func ExperimentalUnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ExperimentalUnbatchDataset", Input: []tf.Input{ - address, method, request, + input_dataset, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2. -type InitializeTableFromTextFileV2Attr func(optionalAttr) +// StringFormatAttr is an optional argument to StringFormat. +type StringFormatAttr func(optionalAttr) -// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value. -// -// value: Number of elements of the file, use -1 if unknown. -// If not specified, defaults to -1 +// StringFormatTemplate sets the optional template attribute to value. // -// REQUIRES: value >= -1 -func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr { +// value: A string, the template to format tensor summaries into. +// If not specified, defaults to "%s" +func StringFormatTemplate(value string) StringFormatAttr { return func(m optionalAttr) { - m["vocab_size"] = value + m["template"] = value } } -// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value. +// StringFormatPlaceholder sets the optional placeholder attribute to value. // -// value: Delimiter to separate fields in a line. -// If not specified, defaults to "\t" -func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr { +// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted. +// If not specified, defaults to "%s" +func StringFormatPlaceholder(value string) StringFormatAttr { return func(m optionalAttr) { - m["delimiter"] = value + m["placeholder"] = value } } -// Initializes a table from a text file. +// StringFormatSummarize sets the optional summarize attribute to value. // -// It inserts one key-value pair into the table for each line of the file. -// The key and value is extracted from the whole line content, elements from the -// split line based on `delimiter` or the line number (starting from zero). -// Where to extract the key and value from a line is specified by `key_index` and -// `value_index`. +// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension. +// If not specified, defaults to 3 +func StringFormatSummarize(value int64) StringFormatAttr { + return func(m optionalAttr) { + m["summarize"] = value + } +} + +// Formats a string template using a list of tensors. // -// - A value of -1 means use the line number(starting from zero), expects `int64`. -// - A value of -2 means use the whole line content, expects `string`. -// - A value >= 0 means use the index (starting at zero) of the split line based -// on `delimiter`. +// Formats a string template using a list of tensors, pretty-printing tensor summaries. // // Arguments: -// table_handle: Handle to a table which will be initialized. -// filename: Filename of a vocabulary text file. -// key_index: Column index in a line to get the table `key` values from. -// value_index: Column index that represents information of a line to get the table -// `value` values from. +// inputs: The list of tensors to format into the placeholder string. // -// Returns the created operation. -func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) { +// Returns = The resulting string scalar. +func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "InitializeTableFromTextFileV2", + Type: "StringFormat", Input: []tf.Input{ - table_handle, filename, + tf.OutputList(inputs), }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// MeanAttr is an optional argument to Mean. -type MeanAttr func(optionalAttr) - -// MeanKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MeanKeepDims(value bool) MeanAttr { - return func(m optionalAttr) { - m["keep_dims"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the mean of elements across dimensions of a tensor. +// Returns true if queue is closed. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// This operation returns true if the queue is closed and false if the queue +// is open. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. -// -// Returns The reduced tensor. -func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) { +// handle: The handle to a queue. +func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Mean", + Type: "QueueIsClosedV2", Input: []tf.Input{ - input, axis, + handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ProdAttr is an optional argument to Prod. -type ProdAttr func(optionalAttr) - -// ProdKeepDims sets the optional keep_dims attribute to value. -// -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func ProdKeepDims(value bool) ProdAttr { - return func(m optionalAttr) { - m["keep_dims"] = value +// Computes inverse hyperbolic tangent of x element-wise. +func Atanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Atanh", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the product of elements across dimensions of a tensor. +// Computes the reverse mode backpropagated gradient of the Cholesky algorithm. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// For an explanation see "Differentiation of the Cholesky algorithm" by +// Iain Murray http://arxiv.org/abs/1602.07527. // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`. +// Algorithm depends only on lower triangular part of the innermost matrices of +// this tensor. +// grad: df/dl where f is some scalar function. Shape is `[..., M, M]`. +// Algorithm depends only on lower triangular part of the innermost matrices of +// this tensor. // -// Returns The reduced tensor. -func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { +// Returns Symmetrized version of df/dA . Shape is `[..., M, M]` +func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Prod", + Type: "CholeskyGrad", Input: []tf.Input{ - input, axis, + l, grad, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeBilinearAttr is an optional argument to ResizeBilinear. -type ResizeBilinearAttr func(optionalAttr) - -// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using bilinear interpolation. +// Assigns a new value to a variable. // -// Input images can be of different types but output images are always float. +// Any ReadVariableOp with a control dependency on this op is guaranteed to return +// this value or a subsequent newer value of the variable. // // Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// resource: handle to the resource in which to store the variable. +// value: the value to set the new tensor to use. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { +// Returns the created operation. +func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeBilinear", + Type: "AssignVariableOp", Input: []tf.Input{ - images, size, + resource, value, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// MaxAttr is an optional argument to Max. -type MaxAttr func(optionalAttr) - -// MaxKeepDims sets the optional keep_dims attribute to value. +// Returns a tensor of ones with the same shape and type as x. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func MaxKeepDims(value bool) MaxAttr { - return func(m optionalAttr) { - m["keep_dims"] = value +// Arguments: +// x: a tensor of type T. +// +// Returns a tensor of the same shape and type as x but filled with ones. +func OnesLike(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "OnesLike", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the maximum of elements across dimensions of a tensor. +// The gradient of SparseFillEmptyRows. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// Takes vectors reverse_index_map, shaped `[N]`, and grad_values, +// shaped `[N_full]`, where `N_full >= N` and copies data into either +// `d_values` or `d_default_value`. Here `d_values` is shaped `[N]` and +// `d_default_value` is a scalar. +// +// d_values[j] = grad_values[reverse_index_map[j]] +// d_default_value = sum_{k : 0 .. N_full - 1} ( +// grad_values[k] * 1{k not in reverse_index_map}) // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// reverse_index_map: 1-D. The reverse index map from SparseFillEmptyRows. +// grad_values: 1-D. The gradients from backprop. // -// Returns The reduced tensor. -func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) { +// Returns 1-D. The backprop into values.0-D. The backprop into default_value. +func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Max", + Type: "SparseFillEmptyRowsGrad", Input: []tf.Input{ - input, axis, + reverse_index_map, grad_values, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Creates a dataset that contains the unique elements of `input_dataset`. -func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Creates a dataset that zips together `input_datasets`. +func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalUniqueDataset", + Type: "ZipDataset", Input: []tf.Input{ - input_dataset, + tf.OutputList(input_datasets), }, Attrs: attrs, } @@ -26115,668 +25936,610 @@ func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_typ return op.Output(0) } -// ArgMinAttr is an optional argument to ArgMin. -type ArgMinAttr func(optionalAttr) +// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters. +type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr) -// ArgMinOutputType sets the optional output_type attribute to value. -// If not specified, defaults to DT_INT64 -func ArgMinOutputType(value tf.DataType) ArgMinAttr { +// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr { return func(m optionalAttr) { - m["output_type"] = value + m["table_id"] = value } } -// Returns the index with the smallest value across dimensions of a tensor. +// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load Adagrad embedding parameters. // -// Note that in case of ties the identity of the return value is not guaranteed. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: +// parameters: Value of parameters used in the Adagrad optimization algorithm. +// accumulators: Value of accumulators used in the Adagrad optimization algorithm. // -// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. -// Describes which dimension of the input Tensor to reduce across. For vectors, -// use dimension = 0. -func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) { +// +// +// Returns the created operation. +func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ArgMin", + Type: "LoadTPUEmbeddingAdagradParameters", Input: []tf.Input{ - input, dimension, + parameters, accumulators, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Convert the quantized 'input' tensor into a lower-precision 'output', using the -// -// output range specified with 'requested_output_min' and 'requested_output_max'. -// -// [input_min, input_max] are scalar floats that specify the range for the float -// interpretation of the 'input' data. For example, if input_min is -1.0f and -// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 -// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. +// Strip leading and trailing whitespaces from the Tensor. // // Arguments: +// input: A string `Tensor` of any shape. // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// requested_output_min: The float value that the minimum quantized output value represents. -// requested_output_max: The float value that the maximum quantized output value represents. -// out_type: The type of the output. Should be a lower bit depth than Tinput. -// -// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output. -func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { +// Returns A string `Tensor` of the same shape as the input. +func StringStrip(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "Requantize", + Type: "StringStrip", Input: []tf.Input{ - input, input_min, input_max, requested_output_min, requested_output_max, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Creates a dataset that emits the lines of one or more text files. +// Converts each string in the input Tensor to its hash mod by a number of buckets. +// +// The hash function is deterministic on the content of the string within the +// process. The hash function is a keyed hash function, where attribute `key` +// defines the key of the hash function. `key` is an array of 2 elements. +// +// A strong hash is important when inputs may be malicious, e.g. URLs with +// additional components. Adversaries could try to make their inputs hash to the +// same bucket for a denial-of-service attack or to skew the results. A strong +// hash prevents this by making it difficult, if not infeasible, to compute inputs +// that hash to the same bucket. This comes at a cost of roughly 4x higher compute +// time than `tf.string_to_hash_bucket_fast`. // // Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar containing the number of bytes to buffer. -func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { +// input: The strings to assign a hash bucket. +// num_buckets: The number of buckets. +// key: The key for the keyed hash function passed as a list of two uint64 +// elements. +// +// Returns A Tensor of the same shape as the input `string_tensor`. +func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key} opspec := tf.OpSpec{ - Type: "TextLineDataset", + Type: "StringToHashBucketStrong", Input: []tf.Input{ - filenames, compression_type, buffer_size, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \sum_j data_j\\) where sum is over `j` such -// that `segment_ids[j] == i`. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: +// StringLengthAttr is an optional argument to StringLength. +type StringLengthAttr func(optionalAttr) + +// StringLengthUnit sets the optional unit attribute to value. // -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_sum(c, tf.constant([0, 0, 1])) -// # ==> [[5, 5, 5, 5], -// # [5, 6, 7, 8]] -// ``` +// value: The unit that is counted to compute string length. One of: `"BYTE"` (for +// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8 +// encoded Unicode code points in each string). Results are undefined +// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally +// valid UTF-8. +// If not specified, defaults to "BYTE" +func StringLengthUnit(value string) StringLengthAttr { + return func(m optionalAttr) { + m["unit"] = value + } +} + +// String lengths of `input`. // +// Computes the length of each string given in the input tensor. // // Arguments: +// input: The string for which to compute the length. // -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns Integer tensor that has the same shape as `input`. The output contains the +// element-wise string lengths of `input`. +func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SegmentSum", + Type: "StringLength", Input: []tf.Input{ - data, segment_ids, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. -// -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: -// -// ``` -// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_mean(c, tf.constant([0, 0, 1])) -// # ==> [[2.5, 2.5, 2.5, 2.5], -// # [5, 6, 7, 8]] -// ``` -// +// Performs gradient updates of embedding tables. // // Arguments: +// inputs: A TensorList of gradients with which to update embedding tables. +// This argument has the same length and shapes as the return value of +// RecvTPUEmbeddingActivations, but contains gradients of the model's loss +// with respect to the embedding activations. The embedding tables are updated +// from these gradients via the optimizer specified in the TPU embedding +// configuration given to tpu.initialize_system. +// learning_rates: A TensorList of float32 scalars, one for each dynamic learning +// rate tag: see the comments in +// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto. +// Multiple tables can share the same dynamic learning rate tag as specified +// in the configuration. If the learning rates for all tables are constant, +// this list should be empty. +// config: Serialized TPUEmbeddingConfiguration proto. // -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. +// Returns the created operation. +func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"config": config} + opspec := tf.OpSpec{ + Type: "SendTPUEmbeddingGradients", + Input: []tf.Input{ + tf.OutputList(inputs), tf.OutputList(learning_rates), + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Computes numerical negative value element-wise. // -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// I.e., \\(y = -x\\). +func Neg(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMean", + Type: "Neg", Input: []tf.Input{ - data, segment_ids, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the minimum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such -// that `segment_ids[j] == i`. -// -// If the min is empty for a given segment ID `i`, `output[i] = 0`. -// -//
-// -//
-// -// For example: +// Receives a tensor value broadcast from another device. +func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape} + opspec := tf.OpSpec{ + Type: "CollectiveBcastRecv", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Decode web-safe base64-encoded strings. // -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_min(c, tf.constant([0, 0, 1])) -// # ==> [[1, 2, 2, 1], -// # [5, 6, 7, 8]] -// ``` +// Input may or may not have padding at the end. See EncodeBase64 for padding. +// Web-safe means that input must use - and _ instead of + and /. // // Arguments: +// input: Base64 strings to decode. // -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns Decoded strings. +func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMin", + Type: "DecodeBase64", Input: []tf.Input{ - data, segment_ids, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the sum along segments of a tensor. +// SubstrAttr is an optional argument to Substr. +type SubstrAttr func(optionalAttr) + +// SubstrUnit sets the optional unit attribute to value. // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// value: The unit that is used to create the substring. One of: `"BYTE"` (for +// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8 +// encoded Unicode code points). The default is `"BYTE"`. Results are undefined if +// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid +// UTF-8. +// If not specified, defaults to "BYTE" +func SubstrUnit(value string) SubstrAttr { + return func(m optionalAttr) { + m["unit"] = value + } +} + +// Return substrings from `Tensor` of strings. // -// Computes a tensor such that -// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such -// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` -// need not be sorted and need not cover all values in the full -// range of valid values. +// For each string in the input `Tensor`, creates a substring starting at index +// `pos` with a total length of `len`. // -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// If the given segment ID `i` is negative, the value is dropped and will not be -// added to the sum of the segment. +// If `len` defines a substring that would extend beyond the length of the input +// string, then as many characters as possible are used. // -// `num_segments` should equal the number of distinct segment IDs. +// A negative `pos` indicates distance within the string backwards from the end. // -//
-// -//
+// If `pos` specifies an index which is out of range for any of the input strings, +// then an `InvalidArgumentError` is thrown. // -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 5, 5, 5, 5], -// # [5, 6, 7, 8]] -// ``` +// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on +// Op creation. // +// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about +// broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) // -// Arguments: +// --- // -// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// Examples // +// Using scalar `pos` and `len`: // -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentSum", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the product along segments of a tensor. +// ```python +// input = [b'Hello', b'World'] +// position = 1 +// length = 3 // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// output = [b'ell', b'orl'] +// ``` // -// This operator is similar to the unsorted segment sum operator found -// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). -// Instead of computing the sum over segments, it computes the product of all -// entries belonging to a segment such that: +// Using `pos` and `len` with same shape as `input`: // -// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples -// `j...` such that `segment_ids[j...] == i`. +// ```python +// input = [[b'ten', b'eleven', b'twelve'], +// [b'thirteen', b'fourteen', b'fifteen'], +// [b'sixteen', b'seventeen', b'eighteen']] +// position = [[1, 2, 3], +// [1, 2, 3], +// [1, 2, 3]] +// length = [[2, 3, 4], +// [4, 3, 2], +// [5, 5, 5]] // -// For example: +// output = [[b'en', b'eve', b'lve'], +// [b'hirt', b'urt', b'te'], +// [b'ixtee', b'vente', b'hteen']] +// ``` +// +// Broadcasting `pos` and `len` onto `input`: // -// ``` python -// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) -// tf.unsorted_segment_prod(c, tf.constant([0, 1, 0]), num_segments=2) -// # ==> [[ 4, 6, 6, 4], -// # [5, 6, 7, 8]] // ``` +// input = [[b'ten', b'eleven', b'twelve'], +// [b'thirteen', b'fourteen', b'fifteen'], +// [b'sixteen', b'seventeen', b'eighteen'], +// [b'nineteen', b'twenty', b'twentyone']] +// position = [1, 2, 3] +// length = [1, 2, 3] // -// If there is no entry for a given segment ID `i`, it outputs 1. +// output = [[b'e', b'ev', b'lve'], +// [b'h', b'ur', b'tee'], +// [b'i', b've', b'hte'], +// [b'i', b'en', b'nty']] +// ``` // -// If the given segment ID `i` is negative, then the corresponding value is -// dropped, and will not be included in the result. +// Broadcasting `input` onto `pos` and `len`: // -// Arguments: +// ``` +// input = b'thirteen' +// position = [1, 5, 7] +// length = [3, 2, 1] // -// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// output = [b'hir', b'ee', b'n'] +// ``` // +// Arguments: +// input: Tensor of strings +// pos: Scalar defining the position of first character in each substring +// len: Scalar defining the number of characters to include in each substring // -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Returns Tensor of substrings +func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "UnsortedSegmentProd", + Type: "Substr", Input: []tf.Input{ - data, segment_ids, num_segments, + input, pos, len, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along sparse segments of a tensor. -// -// See `tf.sparse.segment_sum` for usage examples. +// Exits the current frame to its parent frame. // -// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first -// dimension, selecting a subset of dimension 0, specified by `indices`. +// Exit makes its input `data` available to the parent frame. // // Arguments: +// data: The tensor to be made available to the parent frame. // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns The same tensor as `data`. +func Exit(scope *Scope, data tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentMean", + Type: "Exit", Input: []tf.Input{ - data, indices, segment_ids, + data, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deserializes a serialized tree ensemble config and replaces current tree -// -// ensemble. -// -// Arguments: -// tree_ensemble_handle: Handle to the tree ensemble. -// stamp_token: Token to use as the new value of the resource stamp. -// tree_ensemble_serialized: Serialized proto of the ensemble. +// RetrieveTPUEmbeddingProximalAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParameters. +type RetrieveTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// Returns the created operation. -func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "BoostedTreesDeserializeEnsemble", - Input: []tf.Input{ - tree_ensemble_handle, stamp_token, tree_ensemble_serialized, - }, - } - return scope.AddOperation(opspec) -} - -// Transforms a tf.Example proto (as a string) into typed tensors. -// -// Arguments: -// serialized: A vector containing a batch of binary serialized Example protos. -// dense_defaults: A list of Tensors (some may be empty), whose length matches -// the length of `dense_keys`. dense_defaults[j] provides default values -// when the example's feature_map lacks dense_key[j]. If an empty Tensor is -// provided for dense_defaults[j], then the Feature dense_keys[j] is required. -// The input type is inferred from dense_defaults[j], even when it's empty. -// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined, -// then the shape of dense_defaults[j] must match that of dense_shapes[j]. -// If dense_shapes[j] has an undefined major dimension (variable strides dense -// feature), dense_defaults[j] must contain a single element: -// the padding element. -// num_sparse: The number of sparse features to be parsed from the example. This -// must match the lengths of `sparse_keys` and `sparse_types`. -// sparse_keys: A list of `num_sparse` strings. -// The keys expected in the Examples' features associated with sparse values. -// dense_keys: The keys expected in the Examples' features associated with dense -// values. -// sparse_types: A list of `num_sparse` types; the data types of data in each -// Feature given in sparse_keys. -// Currently the ParseSingleExample op supports DT_FLOAT (FloatList), -// DT_INT64 (Int64List), and DT_STRING (BytesList). -// dense_shapes: The shapes of data in each Feature given in dense_keys. -// The length of this list must match the length of `dense_keys`. The -// number of elements in the Feature corresponding to dense_key[j] must -// always equal dense_shapes[j].NumEntries(). If dense_shapes[j] == -// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j] -// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1, -// ..., DN), the shape of the output Tensor dense_values[j] will be (M, -// D1, .., DN), where M is the number of blocks of elements of length -// D1 * .... * DN, in the input. -func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes} - opspec := tf.OpSpec{ - Type: "ParseSingleExample", - Input: []tf.Input{ - serialized, tf.OutputList(dense_defaults), - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil { - scope.UpdateErr("ParseSingleExample", err) - return - } - return sparse_indices, sparse_values, sparse_shapes, dense_values -} - -// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2. -type WholeFileReaderV2Attr func(optionalAttr) - -// WholeFileReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingProximalAdagradParametersTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value } } -// WholeFileReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. +// RetrieveTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value. // If not specified, defaults to "" -func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr { +func RetrieveTPUEmbeddingProximalAdagradParametersTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["table_name"] = value } } -// A Reader that outputs the entire contents of a file as a value. +// Retrieve proximal Adagrad embedding parameters. // -// To use, enqueue filenames in a Queue. The output of ReaderRead will -// be a filename (key) and the contents of that file (value). +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. // -// Returns The handle to reference the Reader. -func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) { +// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm. +func RetrieveTPUEmbeddingProximalAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "WholeFileReaderV2", + Type: "RetrieveTPUEmbeddingProximalAdagradParameters", Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Pop the element at the top of the stack. +// Produce a string tensor that encodes the state of a Reader. // -// Arguments: -// handle: The handle to a stack. -// elem_type: The type of the elem that is popped. +// Not all Readers support being serialized, so this can produce an +// Unimplemented error. // -// Returns The tensor that is popped from the top of the stack. -func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.Output) { +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"elem_type": elem_type} opspec := tf.OpSpec{ - Type: "StackPopV2", + Type: "ReaderSerializeStateV2", Input: []tf.Input{ - handle, + reader_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes hyperbolic cosine of x element-wise. -func Cosh(scope *Scope, x tf.Output) (y tf.Output) { +// Returns the number of tensors in the input tensor list. +// +// input_handle: the input list +// length: the number of tensors in the list +func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Cosh", + Type: "TensorListLength", Input: []tf.Input{ - x, + input_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along sparse segments of a tensor. -// -// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// Creates a dataset with a range of values. Corresponds to python's xrange. // // Arguments: +// start: corresponds to start in python's xrange(). +// stop: corresponds to stop in python's xrange(). +// step: corresponds to step in python's xrange(). // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. // -// Returns Has same shape as data, except for dimension 0 which has size -// `num_segments`. -func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SparseSegmentMeanWithNumSegments", + Type: "RangeDataset", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + start, stop, step, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. -type CudnnRNNParamsSizeAttr func(optionalAttr) - -// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. -// If not specified, defaults to "lstm" -func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["rnn_mode"] = value +// Computes inverse hyperbolic sine of x element-wise. +func Asinh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return } -} - -// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. -// If not specified, defaults to "linear_input" -func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["input_mode"] = value + opspec := tf.OpSpec{ + Type: "Asinh", + Input: []tf.Input{ + x, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. -// If not specified, defaults to "unidirectional" -func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { - return func(m optionalAttr) { - m["direction"] = value - } -} +// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode. +type UnicodeTranscodeAttr func(optionalAttr) -// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { +// UnicodeTranscodeErrors sets the optional errors attribute to value. +// +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr { return func(m optionalAttr) { - m["dropout"] = value + m["errors"] = value } } -// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { +// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value. +// +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD or U+65533.) +// +// Note that for UTF-8, passing a replacement character expressible in 1 byte, such +// as ' ', will preserve string alignment to the source since invalid bytes will be +// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte +// replacement character will preserve byte alignment to the source. +// If not specified, defaults to 65533 +func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr { return func(m optionalAttr) { - m["seed"] = value + m["replacement_char"] = value } } -// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. -// If not specified, defaults to 0 -func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { +// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// +// value: Whether to replace the C0 control characters (00-1F) with the +// `replacement_char`. Default is false. +// If not specified, defaults to false +func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr { return func(m optionalAttr) { - m["seed2"] = value + m["replace_control_characters"] = value } } -// Computes size of weights that can be used by a Cudnn RNN model. +// Transcode the input text from a source encoding to a destination encoding. // -// Return the params size that can be used by the Cudnn RNN model. Subsequent -// weight allocation and initialization should use this size. +// The input is a string tensor of any shape. The output is a string tensor of +// the same shape containing the transcoded strings. Output strings are always +// valid unicode. If the input contains invalid encoding positions, the +// `errors` attribute sets the policy for how to deal with them. If the default +// error-handling policy is used, invalid formatting will be substituted in the +// output by the `replacement_char`. If the errors policy is to `ignore`, any +// invalid encoding positions in the input are skipped and not included in the +// output. If it set to `strict` then any invalid formatting will result in an +// InvalidArgument error. // -// num_layers: Specifies the number of layers in the RNN model. -// num_units: Specifies the size of the hidden state. -// input_size: Specifies the size of the input state. -// rnn_mode: Indicates the type of the RNN model. -// input_mode: Indicate whether there is a linear projection between the input and -// The actual computation before the first layer. 'skip_input' is only allowed -// when input_size == num_units; 'auto_select' implies 'skip_input' when -// input_size == num_units; otherwise, it implies 'linear_input'. -// direction: Indicates whether a bidirectional model will be used. -// dir = (direction == bidirectional) ? 2 : 1 -// dropout: dropout probability. When set to 0., dropout is disabled. -// seed: the 1st part of a seed to initialize dropout. -// seed2: the 2nd part of a seed to initialize dropout. -// params_size: The size of the params buffer that should be allocated and -// initialized for this RNN model. Note that this params buffer may not be -// compatible across GPUs. Please use CudnnRNNParamsWeights and -// CudnnRNNParamsBiases to save and restore them in a way that is compatible -// across different runs. -func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { +// This operation can be used with `output_encoding = input_encoding` to enforce +// correct formatting for inputs even if they are already in the desired encoding. +// +// If the input is prefixed by a Byte Order Mark needed to determine encoding +// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that +// BOM will be consumed and not emitted into the output. If the input encoding +// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is +// interpreted as a non-breaking-space and is preserved in the output (including +// always for UTF-8). +// +// The end result is that if the input is marked as an explicit endianness the +// transcoding is faithful to all codepoints in the source. If it is not marked +// with an explicit endianness, the BOM is not considered part of the string itself +// but as metadata, and so is not preserved in the output. +// +// Arguments: +// input: The text to be processed. Can have any shape. +// input_encoding: Text encoding of the input strings. This is any of the encodings supported +// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +// output_encoding: The unicode encoding to use in the output. Must be one of +// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian. +// +// Returns A string tensor containing unicode text encoded using `output_encoding`. +func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T, "S": S} + attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "CudnnRNNParamsSize", + Type: "UnicodeTranscode", Input: []tf.Input{ - num_layers, num_units, input_size, + input, }, Attrs: attrs, } @@ -26784,240 +26547,397 @@ func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, return op.Output(0) } -// Computes gradients for SparseSegmentMean. +// UnicodeDecodeAttr is an optional argument to UnicodeDecode. +type UnicodeDecodeAttr func(optionalAttr) + +// UnicodeDecodeErrors sets the optional errors attribute to value. // -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeDecodeErrors(value string) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["errors"] = value + } +} + +// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value. +// +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD or U+65533.) +// If not specified, defaults to 65533 +func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["replacement_char"] = value + } +} + +// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value. +// +// value: Whether to replace the C0 control characters (00-1F) with the +// `replacement_char`. Default is false. +// If not specified, defaults to false +func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr { + return func(m optionalAttr) { + m["replace_control_characters"] = value + } +} + +// Decodes each string in `input` into a sequence of Unicode code points. +// +// The character codepoints for all strings are returned using a single vector +// `char_values`, with strings expanded to characters in row-major order. +// +// The `row_splits` tensor indicates where the codepoints for +// each input string begin and end within the `char_values` tensor. +// In particular, the values for the `i`th +// string (in row-major order) are stored in the slice +// `[row_splits[i]:row_splits[i+1]]`. Thus: +// +// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th +// character in the `i`th string (in row-major order). +// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th +// string (in row-major order). // // Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { +// input: The text to be decoded. Can have any shape. Note that the output is flattened +// to a vector of char values. +// input_encoding: Text encoding of the input strings. This is any of the encodings supported +// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`. +// +// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints. +func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"input_encoding": input_encoding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", + Type: "UnicodeDecode", Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, + input, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. -// -// N is the size of the segment being reduced. +// Adds up a SparseTensor and a dense Tensor, using these special rules: // -// See `tf.sparse.segment_sum` for usage examples. +// (1) Broadcasts the dense side to have the same shape as the sparse side, if +// eligible; +// (2) Then, only the dense values pointed to by the indices of the SparseTensor +// participate in the cwise addition. // +// By these rules, the result is a logical SparseTensor with exactly the same +// indices and shape, but possibly with different non-zero values. The output of +// this Op is the resultant non-zero values. // // Arguments: +// sp_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, possibly not in canonical ordering. +// sp_values: 1-D. `N` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// dense: `R`-D. The dense Tensor operand. // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns 1-D. The `N` values that are operated on. +func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtN", + Type: "SparseDenseCwiseAdd", Input: []tf.Input{ - data, indices, segment_ids, + sp_indices, sp_values, sp_shape, dense, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Compute the upper regularized incomplete Gamma function `Q(a, x)`. +// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp. +type ResourceApplyRMSPropAttr func(optionalAttr) + +// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value. // -// The upper regularized incomplete Gamma function is defined as: +// value: If `True`, updating of the var, ms, and mom tensors is protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the RMSProp algorithm. // -// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\) +// Note that in dense implementation of this algorithm, ms and mom will +// update even if the grad is zero, but in this sparse implementation, ms +// and mom will not update in iterations during which the grad is zero. // -// where +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon) // -// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\) +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom // -// is the upper incomplete Gama function. +// Arguments: +// var_: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// rho: Decay rate. Must be a scalar. // -// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete -// Gamma function. -func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { +// epsilon: Ridge term. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Igammac", + Type: "ResourceApplyRMSProp", Input: []tf.Input{ - a, x, + var_, ms, mom, lr, rho, momentum, epsilon, grad, }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal. +type StatelessTruncatedNormalAttr func(optionalAttr) + +// StatelessTruncatedNormalDtype sets the optional dtype attribute to value. // -// N is the size of the segment being reduced. +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr { + return func(m optionalAttr) { + m["dtype"] = value + } +} + +// Outputs deterministic pseudorandom values from a truncated normal distribution. // -// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is -// misisng, the `output` tensor at that position will be zeroed. +// The generated values follow a normal distribution with mean 0 and standard +// deviation 1, except that values whose magnitude is more than 2 standard +// deviations from the mean are dropped and re-picked. // -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. +// The outputs are a deterministic function of `shape` and `seed`. // // Arguments: +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). // -// indices: A 1-D tensor. Has same rank as `segment_ids`. -// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. -// num_segments: Should equal the number of distinct segment IDs. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { +// Returns Random values with specified shape. +func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNWithNumSegments", + Type: "StatelessTruncatedNormal", Input: []tf.Input{ - data, indices, segment_ids, num_segments, + shape, seed, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes gradients for SparseSegmentSqrtN. +// RestoreSliceAttr is an optional argument to RestoreSlice. +type RestoreSliceAttr func(optionalAttr) + +// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value. // -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. +// value: Index of file to open first if multiple files match +// `file_pattern`. See the documentation for `Restore`. +// If not specified, defaults to -1 +func RestoreSlicePreferredShard(value int64) RestoreSliceAttr { + return func(m optionalAttr) { + m["preferred_shard"] = value + } +} + +// Restores a tensor from checkpoint files. +// +// This is like `Restore` except that restored tensor can be listed as filling +// only a slice of a larger tensor. `shape_and_slice` specifies the shape of the +// larger tensor and the slice that the restored tensor covers. +// +// The `shape_and_slice` input has the same format as the +// elements of the `shapes_and_slices` input of the `SaveSlices` op. // // Arguments: -// grad: gradient propagated to the SparseSegmentSqrtN op. -// indices: indices passed to the corresponding SparseSegmentSqrtN op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op. -func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { +// file_pattern: Must have a single element. The pattern of the files from +// which we read the tensor. +// tensor_name: Must have a single element. The name of the tensor to be +// restored. +// shape_and_slice: Scalar. The shapes and slice specifications to use when +// restoring a tensors. +// dt: The type of the tensor to be restored. +// +// Returns The restored tensor. +func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dt": dt} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SparseSegmentSqrtNGrad", + Type: "RestoreSlice", Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, + file_pattern, tensor_name, shape_and_slice, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// LRNGradAttr is an optional argument to LRNGrad. -type LRNGradAttr func(optionalAttr) - -// LRNGradDepthRadius sets the optional depth_radius attribute to value. +// Returns the element-wise sum of a list of tensors. // -// value: A depth radius. -// If not specified, defaults to 5 -func LRNGradDepthRadius(value int64) LRNGradAttr { - return func(m optionalAttr) { - m["depth_radius"] = value - } -} - -// LRNGradBias sets the optional bias attribute to value. +// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not +// wait for all of its inputs to be ready before beginning to sum. This can +// save memory if inputs are ready at different times, since minimum temporary +// storage is proportional to the output size rather than the inputs size. // -// value: An offset (usually > 0 to avoid dividing by 0). -// If not specified, defaults to 1 -func LRNGradBias(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["bias"] = value - } -} - -// LRNGradAlpha sets the optional alpha attribute to value. +// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. // -// value: A scale factor, usually positive. -// If not specified, defaults to 1 -func LRNGradAlpha(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["alpha"] = value - } -} - -// LRNGradBeta sets the optional beta attribute to value. +// Returns a `Tensor` of same shape and type as the elements of `inputs`. // -// value: An exponent. -// If not specified, defaults to 0.5 -func LRNGradBeta(value float32) LRNGradAttr { - return func(m optionalAttr) { - m["beta"] = value +// Arguments: +// inputs: A list of `Tensor` objects, each with same shape and type. +// shape: Shape of elements of `inputs`. +func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"shape": shape} + opspec := tf.OpSpec{ + Type: "AccumulateNV2", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Gradients for Local Response Normalization. +// Convert the quantized 'input' tensor into a lower-precision 'output', using the +// +// actual distribution of the values to maximize the usage of the lower bit depth +// and adjusting the output min and max ranges accordingly. +// +// [input_min, input_max] are scalar floats that specify the range for the float +// interpretation of the 'input' data. For example, if input_min is -1.0f and +// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. +// +// This operator tries to squeeze as much precision as possible into an output with +// a lower bit depth by calculating the actual min and max values found in the +// data. For example, maybe that quint16 input has no values lower than 16,384 and +// none higher than 49,152. That means only half the range is actually needed, all +// the float interpretations are between -0.5f and 0.5f, so if we want to compress +// the data into a quint8 output, we can use that range rather than the theoretical +// -1.0f to 1.0f that is suggested by the input min and max. +// +// In practice, this is most useful for taking output from operations like +// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and +// may have large potential output ranges, but in practice have a distribution of +// input values that only uses a small fraction of the possible range. By feeding +// that output into this operator, we can reduce it from 32 bits down to 8 with +// minimal loss of accuracy. // // Arguments: -// input_grads: 4-D with shape `[batch, height, width, channels]`. -// input_image: 4-D with shape `[batch, height, width, channels]`. -// output_image: 4-D with shape `[batch, height, width, channels]`. // -// Returns The gradients for LRN. -func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// out_type: The type of the output. Should be a lower bit depth than Tinput. +// +// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents. +func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"out_type": out_type} opspec := tf.OpSpec{ - Type: "LRNGrad", + Type: "QuantizeDownAndShrinkRange", Input: []tf.Input{ - input_grads, input_image, output_image, + input, input_min, input_max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// AnyAttr is an optional argument to Any. -type AnyAttr func(optionalAttr) +// RandomGammaAttr is an optional argument to RandomGamma. +type RandomGammaAttr func(optionalAttr) -// AnyKeepDims sets the optional keep_dims attribute to value. +// RandomGammaSeed sets the optional seed attribute to value. // -// value: If true, retain reduced dimensions with length 1. -// If not specified, defaults to false -func AnyKeepDims(value bool) AnyAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomGammaSeed(value int64) RandomGammaAttr { return func(m optionalAttr) { - m["keep_dims"] = value + m["seed"] = value } } -// Computes the "logical or" of elements across dimensions of a tensor. +// RandomGammaSeed2 sets the optional seed2 attribute to value. // -// Reduces `input` along the dimensions given in `axis`. Unless -// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in -// `axis`. If `keep_dims` is true, the reduced dimensions are -// retained with length 1. +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomGammaSeed2(value int64) RandomGammaAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from the Gamma distribution(s) described by alpha. +// +// This op uses the algorithm by Marsaglia et al. to acquire samples via +// transformation-rejection from pairs of uniform and normal random variables. +// See http://dl.acm.org/citation.cfm?id=358414 // // Arguments: -// input: The tensor to reduce. -// axis: The dimensions to reduce. Must be in the range -// `[-rank(input), rank(input))`. +// shape: 1-D integer tensor. Shape of independent samples to draw from each +// distribution described by the shape parameters given in alpha. +// alpha: A tensor in which each scalar is a "shape" parameter describing the +// associated gamma distribution. // -// Returns The reduced tensor. -func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { +// Returns A tensor with shape `shape + shape(alpha)`. Each slice +// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for +// `alpha[i0, i1, ...iN]`. The dtype of the output matches the dtype of alpha. +func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...RandomGammaAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -27026,9 +26946,9 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou a(attrs) } opspec := tf.OpSpec{ - Type: "Any", + Type: "RandomGamma", Input: []tf.Input{ - input, axis, + shape, alpha, }, Attrs: attrs, } @@ -27036,30 +26956,66 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou return op.Output(0) } -// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. -type DestroyResourceOpAttr func(optionalAttr) +// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub. +type ResourceScatterNdSubAttr func(optionalAttr) -// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value. // -// value: whether to ignore the error when the resource -// doesn't exist. +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. // If not specified, defaults to true -func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { +func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr { return func(m optionalAttr) { - m["ignore_lookup_error"] = value + m["use_locking"] = value } } -// Deletes the resource specified by the handle. +// Applies sparse subtraction to individual values or slices in a Variable. // -// All subsequent operations using the resource will result in a NotFound -// error status. +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. +// +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: +// +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] +// ``` +// +// For example, say we want to subtract 4 scattered elements from a rank-1 tensor +// with 8 elements. In Python, that subtraction would look like this: +// +// ```python +// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) +// indices = tf.constant([[4], [3], [1], [7]]) +// updates = tf.constant([9, 10, 11, 12]) +// sub = tf.scatter_nd_sub(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(sub) +// ``` +// +// The resulting update to ref would look like this: +// +// [1, -9, 3, -6, -4, 6, 7, -4] +// +// See `tf.scatter_nd` for more details about how to make updates to +// slices. // // Arguments: -// resource: handle to the resource to delete. +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of +// values to add to ref. // // Returns the created operation. -func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { +func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -27068,129 +27024,143 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso a(attrs) } opspec := tf.OpSpec{ - Type: "DestroyResourceOp", + Type: "ResourceScatterNdSub", Input: []tf.Input{ - resource, + ref, indices, updates, }, Attrs: attrs, } return scope.AddOperation(opspec) } -// Generates values in an interval. -// -// A sequence of `num` evenly-spaced values are generated beginning at `start`. -// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, -// so that the last one is exactly `stop`. +// Outputs deterministic pseudorandom random integers from a uniform distribution. // -// For example: +// The generated values follow a uniform distribution in the range `[minval, maxval)`. // -// ``` -// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] -// ``` +// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`. // // Arguments: -// start: 0-D tensor. First entry in the range. -// stop: 0-D tensor. Last entry in the range. -// num: 0-D tensor. Number of values to generate. +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). +// minval: Minimum value (inclusive, scalar). +// maxval: Maximum value (exclusive, scalar). // -// Returns 1-D. The generated values. -func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { +// Returns Random values with specified shape. +func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LinSpace", + Type: "StatelessRandomUniformInt", Input: []tf.Input{ - start, stop, num, + shape, seed, minval, maxval, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ComplexAttr is an optional argument to Complex. -type ComplexAttr func(optionalAttr) +// QuantizedConv2DAttr is an optional argument to QuantizedConv2D. +type QuantizedConv2DAttr func(optionalAttr) -// ComplexTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_COMPLEX64 -func ComplexTout(value tf.DataType) ComplexAttr { +// QuantizedConv2DOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr { return func(m optionalAttr) { - m["Tout"] = value + m["out_type"] = value } } -// Converts two real numbers to a complex number. +// QuantizedConv2DDilations sets the optional dilations attribute to value. // -// Given a tensor `real` representing the real part of a complex number, and a -// tensor `imag` representing the imaginary part of a complex number, this -// operation returns complex numbers elementwise of the form \\(a + bj\\), where -// *a* represents the `real` part and *b* represents the `imag` part. +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes a 2D convolution given quantized 4D input and filter tensors. // -// The input tensors `real` and `imag` must have the same shape. +// The inputs are quantized tensors where the lowest value represents the real +// number of the associated minimum, and the highest represents the maximum. +// This means that you can only interpret the quantized output in the same way, by +// taking the returned minimum and maximum values into account. // -// For example: +// Arguments: // -// ``` -// # tensor 'real' is [2.25, 3.25] -// # tensor `imag` is [4.75, 5.75] -// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] -// ``` -func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { +// filter: filter's input_depth dimension must match input's depth dimensions. +// min_input: The float value that the lowest quantized input value represents. +// max_input: The float value that the highest quantized input value represents. +// min_filter: The float value that the lowest quantized filter value represents. +// max_filter: The float value that the highest quantized filter value represents. +// strides: The stride of the sliding window for each dimension of the input +// tensor. +// padding: The type of padding algorithm to use. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Complex", + Type: "QuantizedConv2D", Input: []tf.Input{ - real, imag, + input, filter, min_input, max_input, min_filter, max_filter, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// ImagAttr is an optional argument to Imag. -type ImagAttr func(optionalAttr) +// ResourceGatherAttr is an optional argument to ResourceGather. +type ResourceGatherAttr func(optionalAttr) -// ImagTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func ImagTout(value tf.DataType) ImagAttr { +// ResourceGatherValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func ResourceGatherValidateIndices(value bool) ResourceGatherAttr { return func(m optionalAttr) { - m["Tout"] = value + m["validate_indices"] = value } } -// Returns the imaginary part of a complex number. +// Gather slices from the variable pointed to by `resource` according to `indices`. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the imaginary part of each element in `input`. All -// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part returned by this operation. +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `indices.shape + params.shape[1:]` where: // -// For example: +// ```python +// # Scalar indices +// output[:, ..., :] = params[indices, :, ... :] // +// # Vector indices +// output[i, :, ..., :] = params[indices[i], :, ... :] +// +// # Higher rank indices +// output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :] // ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.imag(input) ==> [4.75, 5.75] -// ``` -func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { +func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtype": dtype} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Imag", + Type: "ResourceGather", Input: []tf.Input{ - input, + resource, indices, }, Attrs: attrs, } @@ -27198,128 +27168,123 @@ func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output return op.Output(0) } -// Computes hyperbolic tangent of `x` element-wise. -func Tanh(scope *Scope, x tf.Output) (y tf.Output) { +// StatelessMultinomialAttr is an optional argument to StatelessMultinomial. +type StatelessMultinomialAttr func(optionalAttr) + +// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value. +// If not specified, defaults to DT_INT64 +func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr { + return func(m optionalAttr) { + m["output_dtype"] = value + } +} + +// Draws samples from a multinomial distribution. +// +// Arguments: +// logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice `[i, :]` +// represents the unnormalized log probabilities for all classes. +// num_samples: 0-D. Number of independent samples to draw for each row slice. +// seed: 2 seeds (shape [2]). +// +// Returns 2-D Tensor with shape `[batch_size, num_samples]`. Each slice `[i, :]` +// contains the drawn class labels with range `[0, num_classes)`. +func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Tanh", + Type: "StatelessMultinomial", Input: []tf.Input{ - x, + logits, num_samples, seed, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the maximum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such -// that `segment_ids[j] == i`. -// -// If the max is empty for a given segment ID `i`, `output[i] = 0`. +// Returns a batched matrix tensor with new batched diagonal values. // -//
-// -//
+// Given `input` and `diagonal`, this operation returns a tensor with the +// same shape and values as `input`, except for the main diagonal of the +// innermost matrices. These will be overwritten by the values in `diagonal`. // -// For example: +// The output is computed as follows: // -// ``` -// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) -// tf.segment_max(c, tf.constant([0, 0, 1])) -// # ==> [[4, 3, 3, 4], -// # [5, 6, 7, 8]] -// ``` +// Assume `input` has `k+1` dimensions `[I, J, K, ..., M, N]` and `diagonal` has +// `k` dimensions `[I, J, K, ..., min(M, N)]`. Then the output is a +// tensor of rank `k+1` with dimensions `[I, J, K, ..., M, N]` where: // +// * `output[i, j, k, ..., m, n] = diagonal[i, j, k, ..., n]` for `m == n`. +// * `output[i, j, k, ..., m, n] = input[i, j, k, ..., m, n]` for `m != n`. // // Arguments: +// input: Rank `k+1`, where `k >= 1`. +// diagonal: Rank `k`, where `k >= 1`. // -// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// Returns Rank `k+1`, with `output.shape = input.shape`. +func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMax", + Type: "MatrixSetDiag", Input: []tf.Input{ - data, segment_ids, + input, diagonal, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that skips `count` elements from the `input_dataset`. -// -// Arguments: +// Returns the element-wise max of two SparseTensors. // -// count: A scalar representing the number of elements from the `input_dataset` -// that should be skipped. If count is -1, skips everything. +// Assumes the two SparseTensors have the same shape, i.e., no broadcasting. // +// Arguments: +// a_indices: 2-D. `N x R` matrix with the indices of non-empty values in a +// SparseTensor, in the canonical lexicographic ordering. +// a_values: 1-D. `N` non-empty values corresponding to `a_indices`. +// a_shape: 1-D. Shape of the input SparseTensor. +// b_indices: counterpart to `a_indices` for the other operand. +// b_values: counterpart to `a_values` for the other operand; must be of the same dtype. +// b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal. // -func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns 2-D. The indices of the output SparseTensor.1-D. The values of the output SparseTensor. +func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SkipDataset", + Type: "SparseSparseMaximum", Input: []tf.Input{ - input_dataset, count, + a_indices, a_values, a_shape, b_indices, b_values, b_shape, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// RealAttr is an optional argument to Real. -type RealAttr func(optionalAttr) - -// RealTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func RealTout(value tf.DataType) RealAttr { - return func(m optionalAttr) { - m["Tout"] = value - } -} - -// Returns the real part of a complex number. -// -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the real part of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real -// part returned by this operation and *b* is the imaginary part. -// -// For example: +// List of the given size with empty elements. // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.real(input) ==> [-2.25, 3.25] -// ``` -func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) { +// element_shape: the shape of the future elements of the list +// num_elements: the number of elements to reserve +// handle: the output list +// element_dtype: the desired type of elements in the list. +func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "Real", + Type: "TensorListReserve", Input: []tf.Input{ - input, + element_shape, num_elements, }, Attrs: attrs, } @@ -27327,164 +27292,198 @@ func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output return op.Output(0) } -// Sends `input` to all devices that are connected to the output. -// -// Sends `input` to all devices that are connected to the output. -// -// The graph should be constructed so that all ops connected to the output have a -// valid device assignment, and the op itself is assigned one of these devices. -// -// input: The input to the broadcast. -// output: The same as input. -// shape: The shape of the input tensor. +// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters. +type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr) + +// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"shape": shape} - opspec := tf.OpSpec{ - Type: "NcclBroadcast", - Input: []tf.Input{ - input, - }, - Attrs: attrs, +// REQUIRES: value >= -1 +func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ResizeAreaAttr is an optional argument to ResizeArea. -type ResizeAreaAttr func(optionalAttr) - -// ResizeAreaAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { +// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["table_name"] = value } } -// Resize `images` to `size` using area interpolation. +// Load MDL Adagrad Light embedding parameters. // -// Input images can be of different types but output images are always float. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// The range of pixel values for the output image might be slightly different -// from the range for the input image because of limited numerical precision. -// To guarantee an output range, for example `[0.0, 1.0]`, apply -// `tf.clip_by_value` to the output. +// Arguments: +// parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm. +// accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm. +// weights: Value of weights used in the MDL Adagrad Light optimization algorithm. +// benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm. // -// Each output pixel is computed by first transforming the pixel's footprint into -// the input tensor and then averaging the pixels that intersect the footprint. An -// input pixel's contribution to the average is weighted by the fraction of its -// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. // -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { +// Returns the created operation. +func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeArea", + Type: "LoadTPUEmbeddingMDLAdagradLightParameters", Input: []tf.Input{ - images, size, + parameters, accumulators, weights, benefits, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// VarHandleOpAttr is an optional argument to VarHandleOp. -type VarHandleOpAttr func(optionalAttr) - -// VarHandleOpContainer sets the optional container attribute to value. +// Computes the gradient for the inverse of `x` wrt its input. // -// value: the container this variable is placed in. -// If not specified, defaults to "" -func VarHandleOpContainer(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value +// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` +// is the corresponding input gradient. +func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "InvGrad", + Input: []tf.Input{ + y, dy, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// VarHandleOpSharedName sets the optional shared_name attribute to value. +// Reduces sparse updates into the variable referenced by `resource` using the `min` operation. // -// value: the name by which this variable is referred to. -// If not specified, defaults to "" -func VarHandleOpSharedName(value string) VarHandleOpAttr { - return func(m optionalAttr) { - m["shared_name"] = value +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = min(ref[indices, ...], updates[...]) +// +// # Vector indices (for each i) +// ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...]) +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions are combined. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterMin", + Input: []tf.Input{ + resource, indices, updates, + }, } + return scope.AddOperation(opspec) } -// Creates a handle to a Variable resource. +// Elementwise computes the bitwise OR of `x` and `y`. // -// Arguments: -// dtype: the type of this variable. Must agree with the dtypes -// of all ops using this variable. -// shape: The (possibly partially specified) shape of this variable. -func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { +// The result will have those bits set, that are set in `x`, `y` or both. The +// computation is performed on the underlying representations of `x` and `y`. +func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "VarHandleOp", - - Attrs: attrs, + Type: "BitwiseOr", + Input: []tf.Input{ + x, y, + }, } op := scope.AddOperation(opspec) return op.Output(0) } -// AngleAttr is an optional argument to Angle. -type AngleAttr func(optionalAttr) +// MatrixSolveLsAttr is an optional argument to MatrixSolveLs. +type MatrixSolveLsAttr func(optionalAttr) -// AngleTout sets the optional Tout attribute to value. -// If not specified, defaults to DT_FLOAT -func AngleTout(value tf.DataType) AngleAttr { +// MatrixSolveLsFast sets the optional fast attribute to value. +// If not specified, defaults to true +func MatrixSolveLsFast(value bool) MatrixSolveLsAttr { return func(m optionalAttr) { - m["Tout"] = value + m["fast"] = value } } -// Returns the argument of a complex number. +// Solves one or more linear least-squares problems. // -// Given a tensor `input` of complex numbers, this operation returns a tensor of -// type `float` that is the argument of each element in `input`. All elements in -// `input` must be complex numbers of the form \\(a + bj\\), where *a* -// is the real part and *b* is the imaginary part. +// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same +// type as `matrix` and shape `[..., M, K]`. +// The output is a tensor shape `[..., N, K]` where each output matrix solves +// each of the equations +// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]` +// in the least squares sense. // -// The argument returned by this operation is of the form \\(atan2(b, a)\\). +// We use the following notation for (complex) matrix and right-hand sides +// in the batch: // -// For example: +// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\), +// `rhs`=\\(B \in \mathbb{C}^{m \times k}\\), +// `output`=\\(X \in \mathbb{C}^{n \times k}\\), +// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\). // -// ``` -// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] -// tf.angle(input) ==> [2.0132, 1.056] -// ``` +// If `fast` is `True`, then the solution is computed by solving the normal +// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then +// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares +// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\). +// If \\(m \lt n\\) then `output` is computed as +// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the +// minimum-norm solution to the under-determined linear system, i.e. +// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\), +// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable +// when \\(A\\) is numerically full rank and has a condition number +// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is +// sufficiently large. +// +// If `fast` is `False` an algorithm based on the numerically robust complete +// orthogonal decomposition is used. This computes the minimum-norm +// least-squares solution, even when \\(A\\) is rank deficient. This path is +// typically 6-7 times slower than the fast path. If `fast` is `False` then +// `l2_regularizer` is ignored. +// +// Arguments: +// matrix: Shape is `[..., M, N]`. +// rhs: Shape is `[..., M, K]`. +// l2_regularizer: Scalar tensor. // // @compatibility(numpy) -// Equivalent to np.angle. +// Equivalent to np.linalg.lstsq // @end_compatibility -func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { +// +// Returns Shape is `[..., N, K]`. +func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -27493,9 +27492,9 @@ func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Outp a(attrs) } opspec := tf.OpSpec{ - Type: "Angle", + Type: "MatrixSolveLs", Input: []tf.Input{ - input, + matrix, rhs, l2_regularizer, }, Attrs: attrs, } @@ -27503,140 +27502,204 @@ func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Outp return op.Output(0) } -// Clips tensor values to a specified min and max. +// Interleave the values from the `data` tensors into a single tensor. // -// Given a tensor `t`, this operation returns a tensor of the same type and -// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. -// Any values less than `clip_value_min` are set to `clip_value_min`. Any values -// greater than `clip_value_max` are set to `clip_value_max`. +// Builds a merged tensor such that // -// Arguments: -// t: A `Tensor`. -// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The minimum value to clip by. -// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape -// as `t`. The maximum value to clip by. +// ```python +// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] +// ``` // -// Returns A clipped `Tensor` with the same shape as input 't'. -func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { +// For example, if each `indices[m]` is scalar or vector, we have +// +// ```python +// # Scalar indices: +// merged[indices[m], ...] = data[m][...] +// +// # Vector indices: +// merged[indices[m][i], ...] = data[m][i, ...] +// ``` +// +// Each `data[i].shape` must start with the corresponding `indices[i].shape`, +// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we +// must have `data[i].shape = indices[i].shape + constant`. In terms of this +// `constant`, the output shape is +// +// merged.shape = [max(indices)] + constant +// +// Values are merged in order, so if an index appears in both `indices[m][i]` and +// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the +// merged result. If you do not need this guarantee, ParallelDynamicStitch might +// perform better on some devices. +// +// For example: +// +// ```python +// indices[0] = 6 +// indices[1] = [4, 1] +// indices[2] = [[5, 2], [0, 3]] +// data[0] = [61, 62] +// data[1] = [[41, 42], [11, 12]] +// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] +// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], +// [51, 52], [61, 62]] +// ``` +// +// This method can be used to merge partitions created by `dynamic_partition` +// as illustrated on the following example: +// +// ```python +// # Apply function (increments x_i) on elements for which a certain condition +// # apply (x_i != -1 in this example). +// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) +// condition_mask=tf.not_equal(x,tf.constant(-1.)) +// partitioned_data = tf.dynamic_partition( +// x, tf.cast(condition_mask, tf.int32) , 2) +// partitioned_data[1] = partitioned_data[1] + 1.0 +// condition_indices = tf.dynamic_partition( +// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) +// x = tf.dynamic_stitch(condition_indices, partitioned_data) +// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain +// # unchanged. +// ``` +// +//
+// +//
+func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ClipByValue", + Type: "DynamicStitch", Input: []tf.Input{ - t, clip_value_min, clip_value_max, + tf.OutputList(indices), tf.OutputList(data), }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Counts the number of occurrences of each value in an integer array. -// -// Outputs a vector with length `size` and the same dtype as `weights`. If -// `weights` are empty, then index `i` stores the number of times the value `i` is -// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of -// the value in `weights` at each index where the corresponding value in `arr` is -// `i`. +// Performs a padding as a preprocess during a convolution. // -// Values in `arr` outside of the range [0, size) are ignored. +// Similar to FusedResizeAndPadConv2d, this op allows for an optimized +// implementation where the spatial padding transformation stage is fused with the +// im2col lookup, but in this case without the bilinear filtering required for +// resizing. Fusing the padding prevents the need to write out the intermediate +// results as whole tensors, reducing memory pressure, and we can get some latency +// gains by merging the transformation calculations. +// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC' +// order is used instead. +// Internally this op uses a single per-graph scratch buffer, which means that it +// will block if multiple versions are being run in parallel. This is because this +// operator is primarily an optimization to minimize memory usage. // // Arguments: -// arr: int32 `Tensor`. -// size: non-negative int32 scalar `Tensor`. -// weights: is an int32, int64, float32, or float64 `Tensor` with the same -// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights -// equal to 1. +// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. +// paddings: A two-column matrix specifying the padding sizes. The number of +// rows must be the same as the rank of `input`. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. // -// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for -// each value in the range [0, size). -func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) { +// strides: 1-D of length 4. The stride of the sliding window for each dimension +// of `input`. Must be in the same order as the dimension specified with format. +// padding: The type of padding algorithm to use. +func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} opspec := tf.OpSpec{ - Type: "Bincount", + Type: "FusedPadConv2D", Input: []tf.Input{ - arr, size, weights, + input, paddings, filter, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// CumsumAttr is an optional argument to Cumsum. -type CumsumAttr func(optionalAttr) +// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput. +type Conv2DBackpropInputAttr func(optionalAttr) -// CumsumExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumsum. -// If not specified, defaults to false -func CumsumExclusive(value bool) CumsumAttr { +// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. +// If not specified, defaults to true +func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["use_cudnn_on_gpu"] = value } } -// CumsumReverse sets the optional reverse attribute to value. +// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value. // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumsumReverse(value bool) CumsumAttr { +// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith +// dimension, the amount of padding inserted before and after the dimension is +// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If +// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. +// If not specified, defaults to <> +func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr { return func(m optionalAttr) { - m["reverse"] = value + m["explicit_paddings"] = value } } -// Compute the cumulative sum of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumsum, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumsum([a, b, c]) # => [a, a + b, a + b + c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is -// performed instead: -// -// ```python -// tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumsum is performed in the -// opposite direction: -// -// ```python -// tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. +// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value. // -// The `reverse` and `exclusive` kwargs can also be combined: +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Conv2DBackpropInputDilations sets the optional dilations attribute to value. // -// ```python -// tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] -// ``` +// value: 1-D tensor of length 4. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each filter +// element on that dimension. The dimension order is determined by the value of +// `data_format`, see above for details. Dilations in the batch and depth +// dimensions must be 1. +// If not specified, defaults to +func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr { + return func(m optionalAttr) { + m["dilations"] = value + } +} + +// Computes the gradients of convolution with respect to the input. // // Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) { +// input_sizes: An integer vector representing the shape of `input`, +// where `input` is a 4-D `[batch, height, width, channels]` tensor. +// filter: 4-D with shape +// `[filter_height, filter_width, in_channels, out_channels]`. +// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. +// Gradients w.r.t. the output of the convolution. +// strides: The stride of the sliding window for each dimension of the input +// of the convolution. Must be in the same order as the dimension specified with +// format. +// padding: The type of padding algorithm to use. +// +// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`. Gradient +// w.r.t. the input of the convolution. +func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Cumsum", + Type: "Conv2DBackpropInput", Input: []tf.Input{ - x, axis, + input_sizes, filter, out_backprop, }, Attrs: attrs, } @@ -27644,136 +27707,118 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) ( return op.Output(0) } -// Return the shape of s0 op s1 with broadcast. +// Creates a dataset that executes a SQL query and emits rows of the result set. // -// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the -// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. -func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { +// Arguments: +// driver_name: The database type. Currently, the only supported type is 'sqlite'. +// data_source_name: A connection string to connect to the database. +// query: A SQL query to execute. +// +// +func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "BroadcastArgs", + Type: "ExperimentalSqlDataset", Input: []tf.Input{ - s0, s1, + driver_name, data_source_name, query, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. -type DataFormatDimMapAttr func(optionalAttr) +// LoadTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingCenteredRMSPropParameters. +type LoadTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr) -// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. +// LoadTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: source data format. -// If not specified, defaults to "NHWC" -func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingCenteredRMSPropParametersTableId(value int64) LoadTPUEmbeddingCenteredRMSPropParametersAttr { return func(m optionalAttr) { - m["src_format"] = value + m["table_id"] = value } } -// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. -// -// value: destination data format. -// If not specified, defaults to "NCHW" -func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { +// LoadTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingCenteredRMSPropParametersTableName(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr { return func(m optionalAttr) { - m["dst_format"] = value + m["table_name"] = value } } -// Returns the dimension index in the destination data format given the one in +// Load centered RMSProp embedding parameters. // -// the source data format. +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // // Arguments: -// x: A Tensor with each element as a dimension index in source data format. -// Must be in the range [-4, 4). +// parameters: Value of parameters used in the centered RMSProp optimization algorithm. +// ms: Value of ms used in the centered RMSProp optimization algorithm. +// mom: Value of mom used in the centered RMSProp optimization algorithm. +// mg: Value of mg used in the centered RMSProp optimization algorithm. // -// Returns A Tensor with each element as a dimension index in destination data format. -func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { +// +// +// Returns the created operation. +func LoadTPUEmbeddingCenteredRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingCenteredRMSPropParametersAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DataFormatDimMap", + Type: "LoadTPUEmbeddingCenteredRMSPropParameters", Input: []tf.Input{ - x, + parameters, ms, mom, mg, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute. +type DataFormatVecPermuteAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. +// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value. // -// value: If `True`, perform exclusive cumprod. -// If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +// value: source data format. +// If not specified, defaults to "NHWC" +func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["src_format"] = value } } -// CumprodReverse sets the optional reverse attribute to value. +// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value. // -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr { return func(m optionalAttr) { - m["reverse"] = value + m["dst_format"] = value } } -// Compute the cumulative product of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: -// -// ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: +// Returns the permuted vector/tensor in the destination data format given the // -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` +// one in the source data format. // // Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { +// x: Vector of size 4 or Tensor of shape (4, 2) in source data format. +// +// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format. +func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) { if scope.Err() != nil { return } @@ -27782,9 +27827,9 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "Cumprod", + Type: "DataFormatVecPermute", Input: []tf.Input{ - x, axis, + x, }, Attrs: attrs, } @@ -27792,109 +27837,60 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) return op.Output(0) } -// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. -type QuantizedMatMulAttr func(optionalAttr) - -// QuantizedMatMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } -} - -// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. -// -// value: If true, `a` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_a"] = value - } -} - -// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. -// -// value: If true, `b` is transposed before multiplication. -// If not specified, defaults to false -func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["transpose_b"] = value - } -} - -// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. -// -// value: The type of output produced by activation function -// following this operation. -// If not specified, defaults to DT_QUINT8 -func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { - return func(m optionalAttr) { - m["Tactivation"] = value - } -} - -// Perform a quantized matrix multiplication of `a` by the matrix `b`. -// -// The inputs must be two-dimensional matrices and the inner dimension of -// `a` (after being transposed if `transpose_a` is non-zero) must match the -// outer dimension of `b` (after being transposed if `transposed_b` is -// non-zero). -// -// Arguments: -// a: Must be a two-dimensional tensor. -// b: Must be a two-dimensional tensor. -// min_a: The float value that the lowest quantized `a` value represents. -// max_a: The float value that the highest quantized `a` value represents. -// min_b: The float value that the lowest quantized `b` value represents. -// max_b: The float value that the highest quantized `b` value represents. +// Returns x / y element-wise. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. -func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { +// *NOTE*: `Div` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizedMatMul", + Type: "Div", Input: []tf.Input{ - a, b, min_a, max_a, min_b, max_b, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// QuantizedMulAttr is an optional argument to QuantizedMul. -type QuantizedMulAttr func(optionalAttr) +// ResizeAreaAttr is an optional argument to ResizeArea. +type ResizeAreaAttr func(optionalAttr) -// QuantizedMulToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { +// ResizeAreaAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeAreaAlignCorners(value bool) ResizeAreaAttr { return func(m optionalAttr) { - m["Toutput"] = value + m["align_corners"] = value } } -// Returns x * y element-wise, working on quantized buffers. +// Resize `images` to `size` using area interpolation. // -// Arguments: +// Input images can be of different types but output images are always float. // +// The range of pixel values for the output image might be slightly different +// from the range for the input image because of limited numerical precision. +// To guarantee an output range, for example `[0.0, 1.0]`, apply +// `tf.clip_by_value` to the output. // -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. +// Each output pixel is computed by first transforming the pixel's footprint into +// the input tensor and then averaging the pixels that intersect the footprint. An +// input pixel's contribution to the average is weighted by the fraction of its +// area that intersects the footprint. This is the same as OpenCV's INTER_AREA. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// Arguments: +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) { if scope.Err() != nil { return } @@ -27903,207 +27899,271 @@ func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x a(attrs) } opspec := tf.OpSpec{ - Type: "QuantizedMul", + Type: "ResizeArea", Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, + images, size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// QuantizedAddAttr is an optional argument to QuantizedAdd. -type QuantizedAddAttr func(optionalAttr) - -// QuantizedAddToutput sets the optional Toutput attribute to value. -// If not specified, defaults to DT_QINT32 -func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { - return func(m optionalAttr) { - m["Toutput"] = value - } + return op.Output(0) } -// Returns x + y element-wise, working on quantized buffers. -// -// Arguments: +// Sends `input` to all devices that are connected to the output. // +// Sends `input` to all devices that are connected to the output. // -// min_x: The float value that the lowest quantized `x` value represents. -// max_x: The float value that the highest quantized `x` value represents. -// min_y: The float value that the lowest quantized `y` value represents. -// max_y: The float value that the highest quantized `y` value represents. +// The graph should be constructed so that all ops connected to the output have a +// valid device assignment, and the op itself is assigned one of these devices. // -// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// input: The input to the broadcast. +// output: The same as input. +// shape: The shape of the input tensor. // -// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about -// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { +func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"shape": shape} opspec := tf.OpSpec{ - Type: "QuantizedAdd", + Type: "NcclBroadcast", Input: []tf.Input{ - x, y, min_x, max_x, min_y, max_y, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Given a quantized tensor described by (input, input_min, input_max), outputs a -// -// range that covers the actual values present in that tensor. This op is -// typically used to produce the requested_output_min and requested_output_max for -// Requantize. +// Computes the gradient of morphological 2-D dilation with respect to the filter. // // Arguments: +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`. +// strides: 1-D of length 4. The stride of the sliding window for each dimension of +// the input tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: 1-D of length 4. The input stride for atrous morphological dilation. +// Must be: `[1, rate_height, rate_width, 1]`. +// padding: The type of padding algorithm to use. // -// input_min: The float value that the minimum quantized input value represents. -// input_max: The float value that the maximum quantized input value represents. -// -// Returns The computed min output.the computed max output. -func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { +// Returns 3-D with shape `[filter_height, filter_width, depth]`. +func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} opspec := tf.OpSpec{ - Type: "RequantizationRange", + Type: "Dilation2DBackpropFilter", Input: []tf.Input{ - input, input_min, input_max, + input, filter, out_backprop, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Rolls the elements of a tensor along an axis. +// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap. +type AddSparseToTensorsMapAttr func(optionalAttr) + +// AddSparseToTensorsMapContainer sets the optional container attribute to value. // -// The elements are shifted positively (towards larger indices) by the offset of -// `shift` along the dimension of `axis`. Negative `shift` values will shift -// elements in the opposite direction. Elements that roll passed the last position -// will wrap around to the first and vice versa. Multiple shifts along multiple -// axes may be specified. +// value: The container name for the `SparseTensorsMap` created by this op. +// If not specified, defaults to "" +func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value. // -// For example: +// value: The shared name for the `SparseTensorsMap` created by this op. +// If blank, the new Operation's unique name is used. +// If not specified, defaults to "" +func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Add a `SparseTensor` to a `SparseTensorsMap` return its handle. // -// ``` -// # 't' is [0, 1, 2, 3, 4] -// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2] +// A `SparseTensor` is represented by three tensors: `sparse_indices`, +// `sparse_values`, and `sparse_shape`. // -// # shifting along multiple dimensions -// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] -// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]] +// This operator takes the given `SparseTensor` and adds it to a container +// object (a `SparseTensorsMap`). A unique key within this container is generated +// in the form of an `int64`, and this is the value that is returned. // -// # shifting along the same axis multiple times -// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] -// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]] -// ``` +// The `SparseTensor` can then be read out as part of a minibatch by passing +// the key as a vector element to `TakeManySparseFromTensorsMap`. To ensure +// the correct `SparseTensorsMap` is accessed, ensure that the same +// `container` and `shared_name` are passed to that Op. If no `shared_name` +// is provided here, instead use the *name* of the Operation created by calling +// `AddSparseToTensorsMap` as the `shared_name` passed to +// `TakeManySparseFromTensorsMap`. Ensure the Operations are colocated. // // Arguments: +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. // -// shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which -// elements are shifted positively (towards larger indices) along the dimension -// specified by `axis[i]`. Negative shifts will roll the elements in the opposite -// direction. -// axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift -// `shift[i]` should occur. If the same axis is referenced more than once, the -// total shift for that axis will be the sum of all the shifts that belong to that -// axis. -// -// Returns Has the same shape and size as the input. The elements are shifted -// positively (towards larger indices) by the offsets of `shift` along the -// dimensions of `axis`. -func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) { +// Returns 0-D. The handle of the `SparseTensor` now stored in the +// `SparseTensorsMap`. +func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Roll", + Type: "AddSparseToTensorsMap", Input: []tf.Input{ - input, shift, axis, + sparse_indices, sparse_values, sparse_shape, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Looks up keys in a table, outputs the corresponding values. -// -// The tensor `keys` must of the same type as the keys of the table. -// The output `values` is of the type of the table values. -// -// The scalar `default_value` is the value output for keys not present in the -// table. It must also be of the same type as the table values. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// +// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`. // -// Returns Same shape as `keys`. Values found in the table, or `default_values` -// for missing keys. -func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) { +// tensor: The tensor to put on the list. +// input_handle: The old list. +// output_handle: A list with the elements of the old list followed by tensor. +// element_dtype: the type of elements in the list. +// element_shape: a shape compatible with that of elements in the list. +func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableFindV2", + Type: "TensorListPushBack", Input: []tf.Input{ - table_handle, keys, default_value, + input_handle, tensor, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Updates the table to associates keys with values. +// CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams. +type CudnnRNNCanonicalToParamsAttr func(optionalAttr) + +// CudnnRNNCanonicalToParamsRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNCanonicalToParamsRnnMode(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value + } +} + +// CudnnRNNCanonicalToParamsInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNCanonicalToParamsInputMode(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNCanonicalToParamsDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNCanonicalToParamsDirection(value string) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNCanonicalToParamsDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsDropout(value float32) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNCanonicalToParamsSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsSeed(value int64) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNCanonicalToParamsSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNCanonicalToParamsSeed2(value int64) CudnnRNNCanonicalToParamsAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Converts CudnnRNN params from canonical form to usable form. // -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. +// Writes a set of weights into the opaque params buffer so they can be used in +// upcoming training or inferences. // -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. +// Note that the params buffer may not be compatible across different GPUs. So any +// save and restoration should be converted to and from the canonical weights and +// biases. // -// Returns the created operation. -func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// weights: the canonical form of weights that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// biases: the canonical form of biases that can be used for saving +// and restoration. They are more likely to be compatible across different +// generations. +// num_params: number of parameter sets for all layers. +// Each layer may contain multiple parameter sets, with each set consisting of +// a weight matrix and a bias vector. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsAttr) (params tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "LookupTableInsertV2", + Type: "CudnnRNNCanonicalToParams", Input: []tf.Input{ - table_handle, keys, values, + num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases), }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates a `Dataset` that includes only 1/`num_shards` of this dataset. -// -// Arguments: -// -// num_shards: An integer representing the number of shards operating in parallel. -// index: An integer representing the current worker index. -// -// -func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Creates a dataset containing elements of first component of `input_dataset` having true in the last component. +func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ShardDataset", + Type: "FilterByLastComponentDataset", Input: []tf.Input{ - input_dataset, num_shards, index, + input_dataset, }, Attrs: attrs, } @@ -28111,197 +28171,319 @@ func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, i return op.Output(0) } -// Creates a dataset that batches and pads `batch_size` elements from the input. -// -// Arguments: +// Computes the absolute value of a tensor. // -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// padded_shapes: A list of int64 tensors representing the desired padded shapes -// of the corresponding output components. These shapes may be partially -// specified, using `-1` to indicate that a particular dimension should be -// padded to the maximum size of all batch elements. -// padding_values: A list of scalars containing the padding value to use for -// each of the outputs. -// drop_remainder: A scalar representing whether the last batch should be dropped in case its size -// is smaller than desired. -// -func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, drop_remainder tf.Output, output_shapes []tf.Shape) (handle tf.Output) { +// Given a tensor `x`, this operation returns a tensor containing the absolute +// value of each element in `x`. For example, if x is an input element and y is +// an output element, this operation computes \\(y = |x|\\). +func Abs(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "PaddedBatchDatasetV2", + Type: "Abs", Input: []tf.Input{ - input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), drop_remainder, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns element-wise smallest integer not less than x. -func Ceil(scope *Scope, x tf.Output) (y tf.Output) { +// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2. +type MaxPoolGradV2Attr func(optionalAttr) + +// MaxPoolGradV2DataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes gradients of the maxpooling function. +// +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients w.r.t. the output of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients w.r.t. the input to `max_pool`. +func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"padding": padding} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Ceil", + Type: "MaxPoolGradV2", Input: []tf.Input{ - x, + orig_input, orig_output, grad, ksize, strides, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the number of elements in the given table. +// Restore a reader to a previously saved state. +// +// Not all Readers support being restored, so this can produce an +// Unimplemented error. // // Arguments: -// table_handle: Handle to the table. +// reader_handle: Handle to a Reader. +// state: Result of a ReaderSerializeState of a Reader with type +// matching reader_handle. // -// Returns Scalar that contains number of elements in the table. -func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { +// Returns the created operation. +func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableSizeV2", + Type: "ReaderRestoreStateV2", Input: []tf.Input{ - table_handle, + reader_handle, state, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. -type ResizeBilinearGradAttr func(optionalAttr) - -// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. +// Inverse fast Fourier transform. // -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { - return func(m optionalAttr) { - m["align_corners"] = value +// Computes the inverse 1-dimensional discrete Fourier transform over the +// inner-most dimension of `input`. +// +// Arguments: +// input: A complex tensor. +// +// Returns A complex tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its inverse 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifft +// @end_compatibility +func IFFT(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT", + Input: []tf.Input{ + input, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the gradient of bilinear interpolation. +// 2D fast Fourier transform. +// +// Computes the 2-dimensional discrete Fourier transform over the inner-most +// 2 dimensions of `input`. // // Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. +// input: A complex tensor. // -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { +// Returns A complex tensor of the same shape as `input`. The inner-most 2 +// dimensions of `input` are replaced with their 2D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.fft2 +// @end_compatibility +func FFT2D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeBilinearGrad", + Type: "FFT2D", Input: []tf.Input{ - grads, original_image, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs all keys and values in the table. +// Inverse 2D fast Fourier transform. // -// Arguments: -// table_handle: Handle to the table. +// Computes the inverse 2-dimensional discrete Fourier transform over the +// inner-most 2 dimensions of `input`. // +// Arguments: +// input: A complex tensor. // +// Returns A complex tensor of the same shape as `input`. The inner-most 2 +// dimensions of `input` are replaced with their inverse 2D Fourier transform. // -// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. -func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { +// @compatibility(numpy) +// Equivalent to np.fft.ifft2 +// @end_compatibility +func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} opspec := tf.OpSpec{ - Type: "LookupTableExportV2", + Type: "IFFT2D", Input: []tf.Input{ - table_handle, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Replaces the contents of the table with the specified keys and values. +// Inverse 3D real-valued fast Fourier transform. // -// The tensor `keys` must be of the same type as the keys of the table. -// The tensor `values` must be of the type of the table values. +// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued +// signal over the inner-most 3 dimensions of `input`. +// +// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`: +// The inner-most dimension contains the `fft_length / 2 + 1` unique components of +// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed +// from the size of the inner-most 3 dimensions of `input`. If the FFT length used +// to compute `input` is odd, it should be provided since it cannot be inferred +// properly. +// +// Along each axis `IRFFT3D` is computed on, if `fft_length` (or +// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the +// corresponding dimension of `input`, the dimension is cropped. If it is larger, +// the dimension is padded with zeros. // // Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys to look up. -// values: Values to associate with keys. +// input: A complex64 tensor. +// fft_length: An int32 tensor of shape [3]. The FFT length for each dimension. // -// Returns the created operation. -func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// Returns A float32 tensor of the same rank as `input`. The inner-most 3 +// dimensions of `input` are replaced with the `fft_length` samples of their +// inverse 3D real Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.irfftn with 3 dimensions. +// @end_compatibility +func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableImportV2", + Type: "IRFFT3D", Input: []tf.Input{ - table_handle, keys, values, + input, fft_length, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle. -type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr) - -// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. +// Returns the truth value of (x != y) element-wise. // -// value: The type list for the return values. -// If not specified, defaults to <> +// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NotEqual", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug. +type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// REQUIRES: len(value) >= 0 -func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["output_types"] = value + m["table_id"] = value } } -// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. +// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Load Momentum embedding parameters with debug support. // -// value: The list of shapes being produced. -// If not specified, defaults to <> +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. // -// REQUIRES: len(value) >= 0 -func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr { +// Arguments: +// parameters: Value of parameters used in the Momentum optimization algorithm. +// momenta: Value of momenta used in the Momentum optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug", + Input: []tf.Input{ + parameters, momenta, gradient_accumulators, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal. +type StatefulStandardNormalAttr func(optionalAttr) + +// StatefulStandardNormalDtype sets the optional dtype attribute to value. +// +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr { return func(m optionalAttr) { - m["output_shapes"] = value + m["dtype"] = value } } -// Generates a MultiDeviceIterator resource from its provided string handle. +// Outputs random values from a normal distribution. +// +// The generated values will have mean 0 and standard deviation 1. // // Arguments: -// string_handle: String representing the resource. +// resource: The handle of the resource variable that stores the state of the RNG. +// shape: The shape of the output tensor. // -// Returns A MultiDeviceIterator resource. -func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) { +// Returns A tensor of the specified shape filled with random normal values. +func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -28310,9 +28492,9 @@ func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "MultiDeviceIteratorFromStringHandle", + Type: "StatefulStandardNormal", Input: []tf.Input{ - string_handle, + resource, shape, }, Attrs: attrs, } @@ -28320,116 +28502,225 @@ func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, return op.Output(0) } -// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. -type MutableHashTableV2Attr func(optionalAttr) +// Computes the Gauss error function of `x` element-wise. +func Erf(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Erf", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// MutableHashTableV2Container sets the optional container attribute to value. +// Returns element-wise largest integer not greater than x. +func Floor(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Floor", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the number of records this Reader has produced. // -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableV2Container(value string) MutableHashTableV2Attr { +// This is the same as the number of ReaderRead executions that have +// succeeded. +// +// Arguments: +// reader_handle: Handle to a Reader. +func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ReaderNumRecordsProducedV2", + Input: []tf.Input{ + reader_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorListConcatAttr is an optional argument to TensorListConcat. +type TensorListConcatAttr func(optionalAttr) + +// TensorListConcatElementShape sets the optional element_shape attribute to value. +// If not specified, defaults to +func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr { return func(m optionalAttr) { - m["container"] = value + m["element_shape"] = value } } -// MutableHashTableV2SharedName sets the optional shared_name attribute to value. +// Concats all tensors in the list along the 0th dimension. // -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { +// Requires that all tensors have the same shape except the first dimension. +// +// input_handle: The input list. +// tensor: The concated result. +// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient. +// +func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorListConcat", + Input: []tf.Input{ + input_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Conv3DAttr is an optional argument to Conv3D. +type Conv3DAttr func(optionalAttr) + +// Conv3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func Conv3DDataFormat(value string) Conv3DAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["data_format"] = value } } -// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// Conv3DDilations sets the optional dilations attribute to value. // -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { +// value: 1-D tensor of length 5. The dilation factor for each dimension of +// `input`. If set to k > 1, there will be k-1 skipped cells between each +// filter element on that dimension. The dimension order is determined by the +// value of `data_format`, see above for details. Dilations in the batch and +// depth dimensions must be 1. +// If not specified, defaults to +func Conv3DDilations(value []int64) Conv3DAttr { return func(m optionalAttr) { - m["use_node_name_sharing"] = value + m["dilations"] = value } } -// Creates an empty hash table. +// Computes a 3-D convolution given 5-D `input` and `filter` tensors. // -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a scalar. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. +// In signal processing, cross-correlation is a measure of similarity of +// two waveforms as a function of a time-lag applied to one of them. This +// is also known as a sliding dot product or sliding inner-product. // -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. +// Our Conv3D implements a form of cross-correlation. // -// Returns Handle to a table. -func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { +// Arguments: +// input: Shape `[batch, in_depth, in_height, in_width, in_channels]`. +// filter: Shape `[filter_depth, filter_height, filter_width, in_channels, +// out_channels]`. `in_channels` must match between `input` and `filter`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MutableHashTableV2", - + Type: "Conv3D", + Input: []tf.Input{ + input, filter, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// DequantizeAttr is an optional argument to Dequantize. -type DequantizeAttr func(optionalAttr) +// QuantizeV2Attr is an optional argument to QuantizeV2. +type QuantizeV2Attr func(optionalAttr) -// DequantizeMode sets the optional mode attribute to value. +// QuantizeV2Mode sets the optional mode attribute to value. // If not specified, defaults to "MIN_COMBINED" -func DequantizeMode(value string) DequantizeAttr { +func QuantizeV2Mode(value string) QuantizeV2Attr { return func(m optionalAttr) { m["mode"] = value } } -// Dequantize the 'input' tensor into a float Tensor. +// QuantizeV2RoundMode sets the optional round_mode attribute to value. +// If not specified, defaults to "HALF_AWAY_FROM_ZERO" +func QuantizeV2RoundMode(value string) QuantizeV2Attr { + return func(m optionalAttr) { + m["round_mode"] = value + } +} + +// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'. // // [min_range, max_range] are scalar floats that specify the range for // the 'input' data. The 'mode' attribute controls exactly which calculations are -// used to convert the float values to their quantized equivalents. +// used to convert the float values to their quantized equivalents. The +// 'round_mode' attribute controls which rounding tie-breaking algorithm is used +// when rounding float values to their quantized equivalents. // // In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: // // ``` -// if T == qint8: in[i] += (range(T) + 1)/ 2.0 -// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range) +// if T == qint8: out[i] -= (range(T) + 1) / 2.0 // ``` +// // here `range(T) = numeric_limits::max() - numeric_limits::min()` // // *MIN_COMBINED Mode Example* // -// If the input comes from a QuantizedRelu6, the output type is -// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is -// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. -// Dequantize on quint8 will take each value, cast to float, and multiply -// by 6 / 255. -// Note that if quantizedtype is qint8, the operation will additionally add -// each value by 128 prior to casting. +// Assume the input is type float and has a possible range of [0.0, 6.0] and the +// output type is quint8 ([0, 255]). The min_range and max_range values should be +// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each +// value of the input by 255/6 and cast to quint8. +// +// If the output type was qint8 ([-128, 127]), the operation will additionally +// subtract each value by 128 prior to casting, so that the range of values aligns +// with the range of qint8. // // If the mode is 'MIN_FIRST', then this approach is used: // -// ```c++ +// ``` // num_discrete_values = 1 << (# of bits in T) // range_adjust = num_discrete_values / (num_discrete_values - 1) // range = (range_max - range_min) * range_adjust -// range_scale = range / num_discrete_values -// const double offset_input = static_cast(input) - lowest_quantized; -// result = range_min + ((input - numeric_limits::min()) * range_scale) +// range_scale = num_discrete_values / range +// quantized = round(input * range_scale) - round(range_min * range_scale) + +// numeric_limits::min() +// quantized = max(quantized, numeric_limits::min()) +// quantized = min(quantized, numeric_limits::max()) // ``` // +// The biggest difference between this and MIN_COMBINED is that the minimum range +// is rounded first, before it's subtracted from the rounded value. With +// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing +// and dequantizing will introduce a larger and larger error. +// // *SCALED mode Example* // // `SCALED` mode matches the quantization approach used in @@ -28442,6 +28733,7 @@ func DequantizeMode(value string) DequantizeAttr { // // We first find the range of values in our tensor. The // range we use is always centered on 0, so we find m such that +// // ```c++ // m = max(abs(input_min), abs(input_max)) // ``` @@ -28450,6 +28742,7 @@ func DequantizeMode(value string) DequantizeAttr { // // Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. // If T is signed, this is +// // ``` // num_bits = sizeof(T) * 8 // [min_fixed, max_fixed] = @@ -28457,387 +28750,308 @@ func DequantizeMode(value string) DequantizeAttr { // ``` // // Otherwise, if T is unsigned, the fixed-point range is +// // ``` // [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] // ``` // // From this we compute our scaling factor, s: +// // ```c++ -// s = (2 * m) / (max_fixed - min_fixed) +// s = (max_fixed - min_fixed) / (2 * m) // ``` // -// Now we can dequantize the elements of our tensor: +// Now we can quantize the elements of our tensor: +// // ```c++ -// result = input * s +// result = round(input * s) // ``` // -// Arguments: -// -// min_range: The minimum scalar value possibly produced for the input. -// max_range: The maximum scalar value possibly produced for the input. -func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { +// One thing to watch out for is that the operator may choose to adjust the +// requested minimum and maximum values slightly during the quantization process, +// so you should always use the output ports as the range for further calculations. +// For example, if the requested minimum and maximum values are close to equal, +// they will be separated by a small epsilon value to prevent ill-formed quantized +// buffers from being created. Otherwise, you can end up with buffers where all the +// quantized values map to the same float value, which causes problems for +// operations that have to perform further calculations on them. +// +// Arguments: +// +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +// +// +// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output. +func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"T": T} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Dequantize", + Type: "QuantizeV2", Input: []tf.Input{ input, min_range, max_range, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Flips all bits elementwise. -// -// The result will have exactly those bits set, that are not set in `x`. The -// computation is performed on the underlying representation of x. -func Invert(scope *Scope, x tf.Output) (y tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Invert", - Input: []tf.Input{ - x, - }, +// ComplexAbsAttr is an optional argument to ComplexAbs. +type ComplexAbsAttr func(optionalAttr) + +// ComplexAbsTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ComplexAbsTout(value tf.DataType) ComplexAbsAttr { + return func(m optionalAttr) { + m["Tout"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Deserialize bucket boundaries and ready flag into current QuantileAccumulator. -// -// An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator. -// -// Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a feature. +// Computes the complex absolute value of a tensor. // -// Returns the created operation. -func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream_resource_handle tf.Output, bucket_boundaries []tf.Output) (o *tf.Operation) { +// Given a tensor `x` of complex numbers, this operation returns a tensor of type +// `float` or `double` that is the absolute value of each element in `x`. All +// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute +// value is computed as \\( \sqrt{a^2 + b^2}\\). +func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceDeserialize", + Type: "ComplexAbs", Input: []tf.Input{ - quantile_stream_resource_handle, tf.OutputList(bucket_boundaries), + x, }, + Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Inverse 3D fast Fourier transform. -// -// Computes the inverse 3-dimensional discrete Fourier transform over the -// inner-most 3 dimensions of `input`. -// -// Arguments: -// input: A complex64 tensor. -// -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// Returns the truth value of x AND y element-wise. // -// @compatibility(numpy) -// Equivalent to np.fft.ifftn with 3 dimensions. -// @end_compatibility -func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { +// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "IFFT3D", + Type: "LogicalAnd", Input: []tf.Input{ - input, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deprecated. Disallowed in GraphDef version >= 2. +// Computes the reciprocal of x element-wise. // -// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead -func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { +// I.e., \\(y = 1 / x\\). +func Inv(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AdjustContrast", + Type: "Inv", Input: []tf.Input{ - images, contrast_factor, min_value, max_value, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Table initializer that takes two tensors for keys and values respectively. +// Creates a dataset that batches input elements into a SparseTensor. // // Arguments: -// table_handle: Handle to a table which will be initialized. -// keys: Keys of type Tkey. -// values: Values of type Tval. +// input_dataset: A handle to an input dataset. Must have a single component. +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// row_shape: A vector representing the dense shape of each row in the produced +// SparseTensor. The shape may be partially specified, using `-1` to indicate +// that a particular dimension should use the maximum size of all batch elements. // -// Returns the created operation. -func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { +// +func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "InitializeTableV2", + Type: "ExperimentalDenseToSparseBatchDataset", Input: []tf.Input{ - table_handle, keys, values, + input_dataset, batch_size, row_shape, }, + Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// PrintAttr is an optional argument to Print. -type PrintAttr func(optionalAttr) - -// PrintMessage sets the optional message attribute to value. -// -// value: A string, prefix of the error message. -// If not specified, defaults to "" -func PrintMessage(value string) PrintAttr { - return func(m optionalAttr) { - m["message"] = value - } -} - -// PrintFirstN sets the optional first_n attribute to value. -// -// value: Only log `first_n` number of times. -1 disables logging. -// If not specified, defaults to -1 -func PrintFirstN(value int64) PrintAttr { - return func(m optionalAttr) { - m["first_n"] = value - } -} - -// PrintSummarize sets the optional summarize attribute to value. -// -// value: Only print this many entries of each tensor. -// If not specified, defaults to 3 -func PrintSummarize(value int64) PrintAttr { - return func(m optionalAttr) { - m["summarize"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Prints a list of tensors. -// -// Passes `input` through to `output` and prints `data` when evaluating. -// -// Arguments: -// input: The tensor passed to `output` -// data: A list of tensors to print out when op is evaluated. +// Computes the reciprocal of x element-wise. // -// Returns = The unmodified `input` tensor -func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { +// I.e., \\(y = 1 / x\\). +func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Print", + Type: "Reciprocal", Input: []tf.Input{ - input, tf.OutputList(data), + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2. -type QueueEnqueueManyV2Attr func(optionalAttr) +// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter. +type Conv3DBackpropFilterAttr func(optionalAttr) -// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is too full, this operation will block for up -// to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr { +// Conv3DBackpropFilterDilations sets the optional dilations attribute to value. +// If not specified, defaults to +func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["dilations"] = value } } -// Enqueues zero or more tuples of one or more tensors in the given queue. -// -// This operation slices each component tensor along the 0th dimension to -// make multiple queue elements. All of the tuple components must have the -// same size in the 0th dimension. -// -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. +// Computes the gradients of 3-D convolution with respect to the filter. // -// N.B. If the queue is full, this operation will block until the given -// elements have been enqueued (or 'timeout_ms' elapses, if specified). +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2 // // Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should -// be taken. -// -// Returns the created operation. -func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) { +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"strides": strides, "padding": padding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "QueueEnqueueManyV2", + Type: "Conv3DBackpropFilter", Input: []tf.Input{ - handle, tf.OutputList(components), + input, filter, out_backprop, }, Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// PrintV2Attr is an optional argument to PrintV2. -type PrintV2Attr func(optionalAttr) - -// PrintV2OutputStream sets the optional output_stream attribute to value. -// -// value: A string specifying the output stream or logging level to print to. -// If not specified, defaults to "stderr" -func PrintV2OutputStream(value string) PrintV2Attr { - return func(m optionalAttr) { - m["output_stream"] = value - } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Prints a string scalar. -// -// Prints a string scalar to the desired output_stream. -// -// Arguments: -// input: The string scalar to print. +// Computes square root of x element-wise. // -// Returns the created operation. -func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) { +// I.e., \\(y = \sqrt{x} = x^{1/2}\\). +func Sqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "PrintV2", + Type: "Sqrt", Input: []tf.Input{ - input, + x, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. +// Get the value of the tensor specified by its handle. // // Arguments: -// tag: A string attached to this summary. Used for organization in TensorBoard. -// tensor: A tensor to serialize. -// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin -// data. -func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { +// handle: The handle for a tensor stored in the session state. +// dtype: The type of the output value. +// +// Returns The tensor for the given handle. +func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "TensorSummaryV2", + Type: "GetSessionTensor", Input: []tf.Input{ - tag, tensor, serialized_summary_metadata, + handle, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that asynchronously prefetches elements from `input_dataset`. -// -// Arguments: -// -// buffer_size: The maximum number of elements to buffer in an iterator over -// this dataset. -// +// Computes the gradient for the sqrt of `x` wrt its input. // -func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy` +// is the corresponding input gradient. +func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "PrefetchDataset", + Type: "SqrtGrad", Input: []tf.Input{ - input_dataset, buffer_size, + y, dy, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TensorSummaryAttr is an optional argument to TensorSummary. -type TensorSummaryAttr func(optionalAttr) +// MatrixInverseAttr is an optional argument to MatrixInverse. +type MatrixInverseAttr func(optionalAttr) -// TensorSummaryDescription sets the optional description attribute to value. -// -// value: A json-encoded SummaryDescription proto. -// If not specified, defaults to "" -func TensorSummaryDescription(value string) TensorSummaryAttr { +// MatrixInverseAdjoint sets the optional adjoint attribute to value. +// If not specified, defaults to false +func MatrixInverseAdjoint(value bool) MatrixInverseAttr { return func(m optionalAttr) { - m["description"] = value + m["adjoint"] = value } } -// TensorSummaryLabels sets the optional labels attribute to value. +// Computes the inverse of one or more square invertible matrices or their // -// value: An unused list of strings. -// If not specified, defaults to <> -func TensorSummaryLabels(value []string) TensorSummaryAttr { - return func(m optionalAttr) { - m["labels"] = value - } -} - -// TensorSummaryDisplayName sets the optional display_name attribute to value. +// adjoints (conjugate transposes). // -// value: An unused string. -// If not specified, defaults to "" -func TensorSummaryDisplayName(value string) TensorSummaryAttr { - return func(m optionalAttr) { - m["display_name"] = value - } -} - -// Outputs a `Summary` protocol buffer with a tensor. +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the inverse for all input submatrices `[..., :, :]`. // -// This op is being phased out in favor of TensorSummaryV2, which lets callers pass -// a tag as well as a serialized SummaryMetadata proto string that contains -// plugin-specific data. We will keep this op to maintain backwards compatibility. +// The op uses LU decomposition with partial pivoting to compute the inverses. +// +// If a matrix is not invertible there is no guarantee what the op does. It +// may detect the condition and raise an exception or it may simply return a +// garbage result. // // Arguments: -// tensor: A tensor to serialize. -func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M, M]`. +// +// @compatibility(numpy) +// Equivalent to np.linalg.inv +// @end_compatibility +func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -28846,9 +29060,9 @@ func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr a(attrs) } opspec := tf.OpSpec{ - Type: "TensorSummary", + Type: "MatrixInverse", Input: []tf.Input{ - tensor, + input, }, Attrs: attrs, } @@ -28856,552 +29070,444 @@ func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr return op.Output(0) } -// Read an element from the TensorArray into output `value`. -// -// Arguments: -// handle: The handle to a TensorArray. -// -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// Computes reciprocal of square root of x element-wise. // -// Returns The tensor that is read from the TensorArray. -func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { +// I.e., \\(y = 1 / \sqrt{x}\\). +func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} opspec := tf.OpSpec{ - Type: "TensorArrayReadV3", + Type: "Rsqrt", Input: []tf.Input{ - handle, index, flow_in, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Reduces sparse updates into the variable referenced by `resource` using the `max` operation. -// -// This operation computes -// -// # Scalar indices -// ref[indices, ...] = max(ref[indices, ...], updates[...]) -// -// # Vector indices (for each i) -// ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...]) -// -// # High rank indices (for each i, ..., j) -// ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) -// -// Duplicate entries are handled correctly: if multiple `indices` reference -// the same location, their contributions are combined. -// -// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. -// -//
-// -//
-// -// Arguments: -// resource: Should be from a `Variable` node. -// indices: A tensor of indices into the first dimension of `ref`. -// updates: A tensor of updated values to add to `ref`. +// Rounds the values of a tensor to the nearest integer, element-wise. // -// Returns the created operation. -func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { +// Rounds half to even. Also known as bankers rounding. If you want to round +// according to the current system rounding mode use std::cint. +func Round(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ResourceScatterMax", + Type: "Round", Input: []tf.Input{ - resource, indices, updates, + x, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the gradient for the tanh of `x` wrt its input. +// Delete the TensorArray from its resource container. // -// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` -// is the corresponding input gradient. -func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { +// This enables the user to close and release the resource in the middle +// of a step/run. +// +// Arguments: +// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). +// +// Returns the created operation. +func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TanhGrad", + Type: "TensorArrayCloseV3", Input: []tf.Input{ - y, dy, + handle, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Outputs a `Summary` protocol buffer with scalar values. -// -// The input `tags` and `values` must have the same shape. The generated summary -// has a summary value for each tag-value pair in `tags` and `values`. -// -// Arguments: -// tags: Tags for the summary. -// values: Same shape as `tags. Values for the summary. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { +// Computes exponential of x element-wise. \\(y = e^x\\). +func Exp(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ScalarSummary", + Type: "Exp", Input: []tf.Input{ - tags, values, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Outputs a `Summary` protocol buffer with a histogram. +// NthElementAttr is an optional argument to NthElement. +type NthElementAttr func(optionalAttr) + +// NthElementReverse sets the optional reverse attribute to value. // -// The generated -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// has one summary value containing a histogram for `values`. +// value: When set to True, find the nth-largest value in the vector and vice +// versa. +// If not specified, defaults to false +func NthElementReverse(value bool) NthElementAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Finds values of the `n`-th order statistic for the last dimension. // -// This op reports an `InvalidArgument` error if any value is not finite. +// If the input is a vector (rank-1), finds the entries which is the nth-smallest +// value in the vector and outputs their values as scalar tensor. +// +// For matrices (resp. higher rank input), computes the entries which is the +// nth-smallest value in each row (resp. vector along the last dimension). Thus, +// +// values.shape = input.shape[:-1] // // Arguments: -// tag: Scalar. Tag to use for the `Summary.Value`. -// values: Any shape. Values to use to build the histogram. +// input: 1-D or higher with last dimension at least `n+1`. +// n: 0-D. Position of sorted vector to select along the last dimension (along +// each row for matrices). Valid range of n is `[0, input.shape[:-1])` // -// Returns Scalar. Serialized `Summary` protocol buffer. -func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) { +// Returns The `n`-th order statistic along each last dimensional slice. +func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "HistogramSummary", + Type: "NthElement", Input: []tf.Input{ - tag, values, + input, n, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the number of elements in the given queue. +// Computes the maximum along segments of a tensor. // -// Arguments: -// handle: The handle to a queue. +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. // -// Returns The number of elements in the given queue. -func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "QueueSizeV2", - Input: []tf.Input{ - handle, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ImageSummaryAttr is an optional argument to ImageSummary. -type ImageSummaryAttr func(optionalAttr) - -// ImageSummaryMaxImages sets the optional max_images attribute to value. -// -// value: Max number of batch elements to generate images for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func ImageSummaryMaxImages(value int64) ImageSummaryAttr { - return func(m optionalAttr) { - m["max_images"] = value - } -} - -// ImageSummaryBadColor sets the optional bad_color attribute to value. +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the maximum such that: // -// value: Color to use for pixels with non-finite values. -// If not specified, defaults to > int_val:255 int_val:0 int_val:0 int_val:255 > -func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { - return func(m optionalAttr) { - m["bad_color"] = value - } -} - -// Outputs a `Summary` protocol buffer with images. +// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such +// that `segment_ids[j...] == i`. // -// The summary has up to `max_images` summary values containing images. The -// images are built from `tensor` which must be 4-D with shape `[batch_size, -// height, width, channels]` and where `channels` can be: +// If the maximum is empty for a given segment ID `i`, it outputs the smallest +// possible value for the specific numeric type, +// `output[i] = numeric_limits::lowest()`. // -// * 1: `tensor` is interpreted as Grayscale. -// * 3: `tensor` is interpreted as RGB. -// * 4: `tensor` is interpreted as RGBA. +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. // -// The images have the same number of channels as the input tensor. For float -// input, the values are normalized one image at a time to fit in the range -// `[0, 255]`. `uint8` values are unchanged. The op uses two different -// normalization algorithms: +//
+// +//
// -// * If the input values are all positive, they are rescaled so the largest one -// is 255. +// For example: // -// * If any input value is negative, the values are shifted so input value 0.0 -// is at 127. They are then rescaled so that either the smallest value is 0, -// or the largest one is 255. +// ``` python +// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) +// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2) +// # ==> [[ 4, 3, 3, 4], +// # [5, 6, 7, 8]] +// ``` // -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: // -// * If `max_images` is 1, the summary value tag is '*tag*/image'. -// * If `max_images` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// Arguments: // -// The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. -// Each element must be in the range `[0, 255]` (It represents the value of a -// pixel in the output image). Non-finite values in the input tensor are -// replaced by this tensor in the output image. The default value is the color -// red. +// segment_ids: A tensor whose shape is a prefix of `data.shape`. // -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 4-D of shape `[batch_size, height, width, channels]` where -// `channels` is 1, 3, or 4. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) { +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ImageSummary", + Type: "UnsortedSegmentMax", Input: []tf.Input{ - tag, tensor, + data, segment_ids, num_segments, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. -type AudioSummaryV2Attr func(optionalAttr) - -// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. -// -// value: Max number of batch elements to generate audio for. -// If not specified, defaults to 3 -// -// REQUIRES: value >= 1 -func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { - return func(m optionalAttr) { - m["max_outputs"] = value - } -} - -// Outputs a `Summary` protocol buffer with audio. -// -// The summary has up to `max_outputs` summary values containing audio. The -// audio is built from `tensor` which must be 3-D with shape `[batch_size, -// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are -// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. -// -// The `tag` argument is a scalar `Tensor` of type `string`. It is used to -// build the `tag` of the summary values: -// -// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. -// * If `max_outputs` is greater than 1, the summary value tags are -// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. -// -// Arguments: -// tag: Scalar. Used to build the `tag` attribute of the summary values. -// tensor: 2-D of shape `[batch_size, frames]`. -// sample_rate: The sample rate of the signal in hertz. -// -// Returns Scalar. Serialized `Summary` protocol buffer. -func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { +// Computes softplus: `log(exp(features) + 1)`. +func Softplus(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AudioSummaryV2", + Type: "Softplus", Input: []tf.Input{ - tag, tensor, sample_rate, + features, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Splits a tensor into a list. -// -// list[i] corresponds to lengths[i] tensors from the input tensor. -// The tensor must have rank at least 1 and contain exactly sum(lengths) elements. +// Computes exponential of x - 1 element-wise. // -// tensor: The input tensor. -// element_shape: A shape compatible with that of elements in the tensor. -// lengths: Vector of sizes of the 0th dimension of tensors in the list. -// output_handle: The list. -func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, lengths tf.Output) (output_handle tf.Output) { +// I.e., \\(y = (\exp x) - 1\\). +func Expm1(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListSplit", + Type: "Expm1", Input: []tf.Input{ - tensor, element_shape, lengths, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// AvgPoolAttr is an optional argument to AvgPool. -type AvgPoolAttr func(optionalAttr) - -// AvgPoolDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func AvgPoolDataFormat(value string) AvgPoolAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Performs average pooling on the input. -// -// Each entry in `output` is the mean of the corresponding size `ksize` -// window in `value`. -// -// Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// ksize: The size of the sliding window for each dimension of `value`. -// strides: The stride of the sliding window for each dimension of `value`. -// padding: The type of padding algorithm to use. +// Computes natural logarithm of x element-wise. // -// Returns The average pooled output tensor. -func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) { +// I.e., \\(y = \log_e x\\). +func Log(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "AvgPool", + Type: "Log", Input: []tf.Input{ - value, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Merges summaries. -// -// This op creates a -// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) -// protocol buffer that contains the union of all the values in the input -// summaries. +// Returns the index of a data point that should be added to the seed set. // -// When the Op is run, it reports an `InvalidArgument` error if multiple values -// in the summaries to merge use the same tag. +// Entries in distances are assumed to be squared distances of candidate points to +// the already sampled centers in the seed set. The op constructs one Markov chain +// of the k-MC^2 algorithm and returns the index of one candidate point to be added +// as an additional cluster center. // // Arguments: -// inputs: Can be of any shape. Each must contain serialized `Summary` protocol -// buffers. +// distances: Vector with squared distances to the closest previously sampled cluster center +// for each candidate point. +// seed: Scalar. Seed for initializing the random number generator. // -// Returns Scalar. Serialized `Summary` protocol buffer. -func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { +// Returns Scalar with the index of the sampled point. +func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MergeSummary", + Type: "KMC2ChainInitialization", Input: []tf.Input{ - tf.OutputList(inputs), + distances, seed, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// The shape of the elements of the given list, as a tensor. -// -// input_handle: the list -// element_shape: the shape of elements of the list -func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) { +// Computes hyperbolic sine of x element-wise. +func Sinh(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shape_type": shape_type} opspec := tf.OpSpec{ - Type: "TensorListElementShape", + Type: "Sinh", Input: []tf.Input{ - input_handle, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the item in the list with the given index. +// Computes the sum along sparse segments of a tensor. // -// input_handle: the list -// index: the position in the list from which an element will be retrieved -// item: the element at that position +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first +// dimension, selecting a subset of dimension 0, specified by `indices`. // +// For example: // -func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) { +// ```python +// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]]) +// +// # Select two rows, one segment. +// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0])) +// # => [[0 0 0 0]] +// +// # Select two rows, two segment. +// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1])) +// # => [[ 1 2 3 4] +// # [-1 -2 -3 -4]] +// +// # Select all rows, two segments. +// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1])) +// # => [[0 0 0 0] +// # [5 6 7 8]] +// +// # Which is equivalent to: +// tf.segment_sum(c, tf.constant([0, 0, 1])) +// ``` +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "TensorListGetItem", + Type: "SparseSegmentSum", Input: []tf.Input{ - input_handle, index, element_shape, + data, indices, segment_ids, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Resizes the list. -// -// -// input_handle: the input list -// size: size of the output list -// -func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) { +// CastAttr is an optional argument to Cast. +type CastAttr func(optionalAttr) + +// CastTruncate sets the optional Truncate attribute to value. +// If not specified, defaults to false +func CastTruncate(value bool) CastAttr { + return func(m optionalAttr) { + m["Truncate"] = value + } +} + +// Cast x of type SrcT to y of DstT. +func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"DstT": DstT} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorListResize", + Type: "Cast", Input: []tf.Input{ - input_handle, size, + x, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns a diagonal tensor with a given diagonal values. -// -// Given a `diagonal`, this operation returns a tensor with the `diagonal` and -// everything else padded with zeros. The diagonal is computed as follows: -// -// Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of -// rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where: -// -// `output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else. -// -// For example: -// -// ``` -// # 'diagonal' is [1, 2, 3, 4] -// tf.diag(diagonal) ==> [[1, 0, 0, 0] -// [0, 2, 0, 0] -// [0, 0, 3, 0] -// [0, 0, 0, 4]] -// ``` -// -// Arguments: -// diagonal: Rank k tensor where k is at most 1. -func Diag(scope *Scope, diagonal tf.Output) (output tf.Output) { +// Computes the log of the absolute value of `Gamma(x)` element-wise. +func Lgamma(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Diag", + Type: "Lgamma", Input: []tf.Input{ - diagonal, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal. -type ParameterizedTruncatedNormalAttr func(optionalAttr) +// UnicodeEncodeAttr is an optional argument to UnicodeEncode. +type UnicodeEncodeAttr func(optionalAttr) -// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value. +// UnicodeEncodeErrors sets the optional errors attribute to value. // -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr { +// value: Error handling policy when there is invalid formatting found in the input. +// The value of 'strict' will cause the operation to produce a InvalidArgument +// error on any invalid input formatting. A value of 'replace' (the default) will +// cause the operation to replace any invalid formatting in the input with the +// `replacement_char` codepoint. A value of 'ignore' will cause the operation to +// skip any invalid formatting in the input and produce no corresponding output +// character. +// If not specified, defaults to "replace" +func UnicodeEncodeErrors(value string) UnicodeEncodeAttr { return func(m optionalAttr) { - m["seed"] = value + m["errors"] = value } } -// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value. +// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr { +// value: The replacement character codepoint to be used in place of any invalid +// formatting in the input when `errors='replace'`. Any valid unicode codepoint may +// be used. The default value is the default unicode replacement character is +// 0xFFFD (U+65533). +// If not specified, defaults to 65533 +func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr { return func(m optionalAttr) { - m["seed2"] = value + m["replacement_char"] = value } } -// Outputs random values from a normal distribution. The parameters may each be a +// Encode a tensor of ints into unicode strings. // -// scalar which applies to the entire output, or a vector of length shape[0] which -// stores the parameters for each batch. +// Returns a vector of strings, where `output[i]` is constructed by encoding the +// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]` +// using `output_encoding`. +// +// --- +// +// Example: +// +// ``` +// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100] +// input_splits = [0, 5, 10] +// output_encoding = 'UTF-8' +// +// output = ['Hello', 'World'] +// ``` // // Arguments: -// shape: The shape of the output tensor. Batches are indexed by the 0th dimension. -// means: The mean parameter of each batch. -// stdevs: The standard deviation parameter of each batch. Must be greater than 0. -// minvals: The minimum cutoff. May be -infinity. -// maxvals: The maximum cutoff. May be +infinity, and must be more than the minval -// for each batch. +// input_values: A 1D tensor containing the unicode codepoints that should be encoded. +// input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings. +// In particular, `output[i]` is constructed by encoding the codepoints in the +// slice `input_values[input_splits[i]:input_splits[i+1]]`. +// output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8", +// "UTF-16-BE", and "UTF-32-BE"`. // -// Returns A matrix of shape num_batches x samples_per_batch, filled with random -// truncated normal values using the parameters for each row. -func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) { +// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints. +func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"output_encoding": output_encoding} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ParameterizedTruncatedNormal", + Type: "UnicodeEncode", Input: []tf.Input{ - shape, means, stdevs, minvals, maxvals, + input_values, input_splits, }, Attrs: attrs, } @@ -29409,154 +29515,110 @@ func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output return op.Output(0) } -// Sets the index-th position of the list to contain the given tensor. -// -// input_handle: the list -// index: the position in the list to which the tensor will be assigned -// item: the element to be assigned to that position -// output_handle: the new list, with the element in the proper position -// -func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) { +// Computes the complementary error function of `x` element-wise. +func Erfc(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListSetItem", + Type: "Erfc", Input: []tf.Input{ - input_handle, index, item, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a Tensor by indexing into the TensorList. -// -// Each row in the produced Tensor corresponds to the element in the TensorList -// specified by the given index (see `tf.gather`). +// Computes sigmoid of `x` element-wise. // -// input_handle: The input tensor list. -// indices: The indices used to index into the list. -// values: The tensor. -func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) { +// Specifically, `y = 1 / (1 + exp(-x))`. +func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} opspec := tf.OpSpec{ - Type: "TensorListGather", + Type: "Sigmoid", Input: []tf.Input{ - input_handle, indices, element_shape, + x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a TensorList by indexing into a Tensor. -// -// Each member of the TensorList corresponds to one row of the input tensor, -// specified by the given index (see `tf.gather`). -// -// tensor: The input tensor. -// indices: The indices used to index into the list. -// element_shape: The shape of the elements in the list (can be less specified than -// the shape of the tensor). -// output_handle: The TensorList. -func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) { +// Computes sin of x element-wise. +func Sin(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorListScatter", + Type: "Sin", Input: []tf.Input{ - tensor, indices, element_shape, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deprecated. Use TensorArrayScatterV3 +// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad. +type FusedBatchNormGradAttr func(optionalAttr) + +// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value. // -// DEPRECATED at GraphDef version 26: Use TensorArrayScatterV3 -func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorArrayScatterV2", - Input: []tf.Input{ - handle, indices, value, flow_in, - }, +// value: A small float number added to the variance of x. +// If not specified, defaults to 0.0001 +func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr { + return func(m optionalAttr) { + m["epsilon"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// AsStringAttr is an optional argument to AsString. -type AsStringAttr func(optionalAttr) - -// AsStringPrecision sets the optional precision attribute to value. +// FusedBatchNormGradDataFormat sets the optional data_format attribute to value. // -// value: The post-decimal precision to use for floating point numbers. -// Only used if precision > -1. -// If not specified, defaults to -1 -func AsStringPrecision(value int64) AsStringAttr { - return func(m optionalAttr) { - m["precision"] = value - } -} - -// AsStringScientific sets the optional scientific attribute to value. -// -// value: Use scientific notation for floating point numbers. -// If not specified, defaults to false -func AsStringScientific(value bool) AsStringAttr { +// value: The data format for y_backprop, x, x_backprop. +// Either "NHWC" (default) or "NCHW". +// If not specified, defaults to "NHWC" +func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr { return func(m optionalAttr) { - m["scientific"] = value + m["data_format"] = value } } -// AsStringShortest sets the optional shortest attribute to value. +// FusedBatchNormGradIsTraining sets the optional is_training attribute to value. // -// value: Use shortest representation (either scientific or standard) for -// floating point numbers. -// If not specified, defaults to false -func AsStringShortest(value bool) AsStringAttr { +// value: A bool value to indicate the operation is for training (default) +// or inference. +// If not specified, defaults to true +func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr { return func(m optionalAttr) { - m["shortest"] = value + m["is_training"] = value } } -// AsStringWidth sets the optional width attribute to value. +// Gradient for batch normalization. // -// value: Pad pre-decimal numbers to this width. -// Applies to both floating point and integer numbers. -// Only used if width > -1. -// If not specified, defaults to -1 -func AsStringWidth(value int64) AsStringAttr { - return func(m optionalAttr) { - m["width"] = value - } -} - -// AsStringFill sets the optional fill attribute to value. +// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". +// The size of 1D Tensors matches the dimension C of the 4D Tensors. // -// value: The value to pad if width > -1. If empty, pads with spaces. -// Another typical value is '0'. String cannot be longer than 1 character. -// If not specified, defaults to "" -func AsStringFill(value string) AsStringAttr { - return func(m optionalAttr) { - m["fill"] = value - } -} - -// Converts each entry in the given tensor to strings. Supports many numeric +// Arguments: +// y_backprop: A 4D Tensor for the gradient with respect to y. +// x: A 4D Tensor for input data. +// scale: A 1D Tensor for scaling factor, to scale the normalized x. +// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch +// mean to be reused in gradient computation. When is_training is +// False, a 1D Tensor for the population mean to be reused in both +// 1st and 2nd order gradient computation. +// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch +// variance (inverted variance in the cuDNN case) to be reused in +// gradient computation. When is_training is False, a 1D Tensor +// for the population variance to be reused in both 1st and 2nd +// order gradient computation. // -// types and boolean. -func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) { +// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input +// in FusedBatchNorm. +func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { if scope.Err() != nil { return } @@ -29565,66 +29627,47 @@ func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output t a(attrs) } opspec := tf.OpSpec{ - Type: "AsString", + Type: "FusedBatchNormGrad", Input: []tf.Input{ - input, + y_backprop, x, scale, reserve_space_1, reserve_space_2, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// Returns a `RaggedTensor` containing the specified sequences of numbers. -// -// -// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and -// `rt_nested_splits`, such that -// `result[i] = range(starts[i], limits[i], deltas[i])`. -// -// ```python -// >>> (rt_nested_splits, rt_dense_values) = gen_ragged_ops.ragged_range( -// ... starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) -// >>> result = ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) -// >>> print result.eval().tolist() -// [[2], # result[0] = range(2, 3) -// [], # result[1] = range(5, 5) -// [8, 9, 10, 11]] # result[2] = range(8, 12) -// ``` -// -// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. -// The vector inputs must all have the same size. Scalar inputs are broadcast -// to match the size of the vector inputs. -// -// Arguments: -// starts: The starts of each range. -// limits: The limits of each range. -// deltas: The deltas of each range. -// -// Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor`. -func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output) (rt_nested_splits tf.Output, rt_dense_values tf.Output) { +// Computes cos of x element-wise. +func Cos(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "RaggedRange", + Type: "Cos", Input: []tf.Input{ - starts, limits, deltas, + x, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Deprecated, use python implementation tf.linalg.matrix_exponential. +// Computes the determinant of one or more square matrices. // -// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead. -func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor containing the determinants +// for all input submatrices `[..., :, :]`. +// +// Arguments: +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[...]`. +func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "MatrixExponential", + Type: "MatrixDeterminant", Input: []tf.Input{ input, }, @@ -29633,145 +29676,145 @@ func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. -type QueueDequeueUpToV2Attr func(optionalAttr) - -// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue has fewer than n elements, this operation -// will block for up to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } -} - -// Dequeues `n` tuples of one or more tensors from the given queue. -// -// This operation is not supported by all queues. If a queue does not support -// DequeueUpTo, then an Unimplemented error is returned. -// -// If the queue is closed and there are more than 0 but less than `n` -// elements remaining, then instead of returning an OutOfRange error like -// QueueDequeueMany, less than `n` elements are returned immediately. If -// the queue is closed and there are 0 elements left in the queue, then -// an OutOfRange error is returned just like in QueueDequeueMany. -// Otherwise the behavior is identical to QueueDequeueMany: -// -// This operation concatenates queue-element component tensors along the -// 0th dimension to make a single component tensor. All of the components -// in the dequeued tuple will have size n in the 0th dimension. +// Updates the tree ensemble by either adding a layer to the last tree being grown // -// This operation has `k` outputs, where `k` is the number of components in -// the tuples stored in the given queue, and output `i` is the ith -// component of the dequeued tuple. +// or by starting a new tree. // // Arguments: -// handle: The handle to a queue. -// n: The number of tuples to dequeue. -// component_types: The type of each component in a tuple. +// tree_ensemble_handle: Handle to the ensemble variable. +// feature_ids: Rank 1 tensor with ids for each feature. This is the real id of +// the feature that will be used in the split. +// node_ids: List of rank 1 tensors representing the nodes for which this feature +// has a split. +// gains: List of rank 1 tensors representing the gains for each of the feature's +// split. +// thresholds: List of rank 1 tensors representing the thesholds for each of the +// feature's split. +// left_node_contribs: List of rank 2 tensors with left leaf contribs for each of +// the feature's splits. Will be added to the previous node values to constitute +// the values of the left nodes. +// right_node_contribs: List of rank 2 tensors with right leaf contribs for each +// of the feature's splits. Will be added to the previous node values to constitute +// the values of the right nodes. +// max_depth: Max depth of the tree to build. +// learning_rate: shrinkage const for each new tree. +// pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning. // -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) { +// Returns the created operation. +func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"pruning_mode": pruning_mode} opspec := tf.OpSpec{ - Type: "QueueDequeueUpToV2", + Type: "BoostedTreesUpdateEnsemble", Input: []tf.Input{ - handle, n, + tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate, }, Attrs: attrs, } + return scope.AddOperation(opspec) +} + +// Computes tan of x element-wise. +func Tan(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tan", + Input: []tf.Input{ + x, + }, + } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that emits each dim-0 slice of `components` once. +func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueUpToV2", err) + attrs := map[string]interface{}{"output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "TensorSliceDataset", + Input: []tf.Input{ + tf.OutputList(components), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes acos of x element-wise. +func Acos(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { return } - return components + opspec := tf.OpSpec{ + Type: "Acos", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the Cholesky decomposition of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. -// -// The input has to be symmetric and positive definite. Only the lower-triangular -// part of the input will be used for this operation. The upper-triangular part -// will not be read. -// -// The output is a tensor of the same shape as the input -// containing the Cholesky decompositions for all input submatrices `[..., :, :]`. -// -// **Note**: The gradient computation on GPU is faster for large matrices but -// not for large batch dimensions when the submatrices are small. In this -// case it might be faster to use the CPU. +// Computes the Bessel i0e function of `x` element-wise. // -// Arguments: -// input: Shape is `[..., M, M]`. +// Exponentially scaled modified Bessel function of order 0 defined as +// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`. // -// Returns Shape is `[..., M, M]`. -func Cholesky(scope *Scope, input tf.Output) (output tf.Output) { +// This function is faster and numerically stabler than `bessel_i0(x)`. +func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Cholesky", + Type: "BesselI0e", Input: []tf.Input{ - input, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Writes contents to the file at input filename. Creates file and recursively -// -// creates directory if not existing. -// -// Arguments: -// filename: scalar. The name of the file to which we write the contents. -// contents: scalar. The content to be written to the output file. +// Shuffle dimensions of x according to a permutation. // -// Returns the created operation. -func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { +// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy: +// `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]` +func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "WriteFile", + Type: "Transpose", Input: []tf.Input{ - filename, contents, + x, perm, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// AllAttr is an optional argument to All. -type AllAttr func(optionalAttr) +// MinAttr is an optional argument to Min. +type MinAttr func(optionalAttr) -// AllKeepDims sets the optional keep_dims attribute to value. +// MinKeepDims sets the optional keep_dims attribute to value. // // value: If true, retain reduced dimensions with length 1. // If not specified, defaults to false -func AllKeepDims(value bool) AllAttr { +func MinKeepDims(value bool) MinAttr { return func(m optionalAttr) { m["keep_dims"] = value } } -// Computes the "logical and" of elements across dimensions of a tensor. +// Computes the minimum of elements across dimensions of a tensor. // // Reduces `input` along the dimensions given in `axis`. Unless // `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in @@ -29784,7 +29827,7 @@ func AllKeepDims(value bool) AllAttr { // `[-rank(input), rank(input))`. // // Returns The reduced tensor. -func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) { +func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -29793,7 +29836,7 @@ func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (ou a(attrs) } opspec := tf.OpSpec{ - Type: "All", + Type: "Min", Input: []tf.Input{ input, axis, }, @@ -29803,403 +29846,388 @@ func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (ou return op.Output(0) } -// Computes the Eigen Decomposition of a batch of square self-adjoint matrices. -// -// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices, with the same constraints as the single matrix -// SelfAdjointEig. -// -// The result is a [..., M+1, M] matrix with [..., 0,:] containing the -// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues -// are sorted in non-decreasing order. +// Computes the Bessel i1e function of `x` element-wise. // -// Arguments: -// input: Shape is `[..., M, M]`. +// Exponentially scaled modified Bessel function of order 0 defined as +// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`. // -// Returns Shape is `[..., M+1, M]`. -func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) { +// This function is faster and numerically stabler than `bessel_i1(x)`. +func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SelfAdjointEig", + Type: "BesselI1e", Input: []tf.Input{ - input, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Solves tridiagonal systems of equations. -// -// `diagonals` is a tensor of shape `[..., 3, M]` whose inner-most 2 dimensions -// represent matrices with three rows being the superdiagonal, diagonals, and -// subdiagonals, in order. The last element of the superdiagonal and the first -// element of the subdiagonal is ignored. -// `rhs` is a tensor of shape `[..., M, K]`, representing K right-hand sides per -// each left-hand side. -// The output is a tensor of shape `[..., M, K]` containing the solutions. +// Returns an element-wise indication of the sign of a number. // -// Arguments: -// diagonals: Shape is `[..., 3, M]`. -// rhs: Shape is `[..., M, K]`. +// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`. // -// Returns Shape is `[..., M, K]`. -func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output) (output tf.Output) { +// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`. +func Sign(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TridiagonalSolve", + Type: "Sign", Input: []tf.Input{ - diagonals, rhs, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes softplus gradients for a softplus operation. +// Creates a dataset that passes a sliding window over `input_dataset`. // // Arguments: -// gradients: The backpropagated gradients to the corresponding softplus operation. -// features: The features passed as input to the corresponding softplus operation. // -// Returns The gradients: `gradients / (1 + exp(-features))`. -func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { +// window_size: A scalar representing the number of elements in the +// sliding window. +// window_shift: A scalar representing the steps moving the sliding window +// forward in one iteration. It must be positive. +// window_stride: A scalar representing the stride of the input elements of the sliding window. +// It must be positive. +// +// +func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SoftplusGrad", + Type: "ExperimentalSlidingWindowDataset", Input: []tf.Input{ - gradients, features, + input_dataset, window_size, window_shift, window_stride, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. -type SelfAdjointEigV2Attr func(optionalAttr) +// OrderedMapUnstageNoKeyAttr is an optional argument to OrderedMapUnstageNoKey. +type OrderedMapUnstageNoKeyAttr func(optionalAttr) -// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. +// OrderedMapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: If `True` then eigenvectors will be computed and returned in `v`. -// Otherwise, only the eigenvalues will be computed. -// If not specified, defaults to true -func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyCapacity(value int64) OrderedMapUnstageNoKeyAttr { return func(m optionalAttr) { - m["compute_v"] = value + m["capacity"] = value } } -// Computes the eigen decomposition of one or more square self-adjoint matrices. -// -// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in -// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues -// are sorted in non-decreasing order. -// -// ```python -// # a is a tensor. -// # e is a tensor of eigenvalues. -// # v is a tensor of eigenvectors. -// e, v = self_adjoint_eig(a) -// e = self_adjoint_eig(a, compute_v=False) -// ``` +// OrderedMapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: `Tensor` input of shape `[N, N]`. +// REQUIRES: value >= 0 +func OrderedMapUnstageNoKeyMemoryLimit(value int64) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeyContainer(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func OrderedMapUnstageNoKeySharedName(value string) OrderedMapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns the (key, value) element with the smallest // -// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. -func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { +// key from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...OrderedMapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "SelfAdjointEigV2", + Type: "OrderedMapUnstageNoKey", Input: []tf.Input{ - input, + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + if scope.Err() != nil { + return + } + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("OrderedMapUnstageNoKey", err) + return + } + return key, values } -// Adjust the saturation of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpretted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A scale is then applied all the saturation -// values, and then remapped back to RGB colorspace. +// Returns element-wise integer closest to x. // -// Arguments: -// images: Images to adjust. At least 3-D. -// scale: A float scale to add to the saturation. +// If the result is midway between two representable values, +// the even representable is chosen. +// For example: // -// Returns The hue-adjusted image or images. -func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { +// ``` +// rint(-1.5) ==> -2.0 +// rint(0.5000001) ==> 1.0 +// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.] +// ``` +func Rint(scope *Scope, x tf.Output) (y tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AdjustSaturation", + Type: "Rint", Input: []tf.Input{ - images, scale, + x, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// MatrixSolveAttr is an optional argument to MatrixSolve. -type MatrixSolveAttr func(optionalAttr) - -// MatrixSolveAdjoint sets the optional adjoint attribute to value. -// -// value: Boolean indicating whether to solve with `matrix` or its (block-wise) -// adjoint. -// If not specified, defaults to false -func MatrixSolveAdjoint(value bool) MatrixSolveAttr { - return func(m optionalAttr) { - m["adjoint"] = value +// Computes the derivative of a Gamma random sample w.r.t. `alpha`. +func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RandomGammaGrad", + Input: []tf.Input{ + alpha, sample, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Solves systems of linear equations. -// -// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is -// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix -// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. -// If `adjoint` is `True` then each output matrix satisfies -// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. +// Returns x + y element-wise. // -// Arguments: -// matrix: Shape is `[..., M, M]`. -// rhs: Shape is `[..., M, K]`. -// -// Returns Shape is `[..., M, K]`. -func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { +// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) + opspec := tf.OpSpec{ + Type: "Add", + Input: []tf.Input{ + x, y, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns x + y element-wise. +// +// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return } opspec := tf.OpSpec{ - Type: "MatrixSolve", + Type: "AddV2", Input: []tf.Input{ - matrix, rhs, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum. -type ResourceApplyKerasMomentumAttr func(optionalAttr) +// AllCandidateSamplerAttr is an optional argument to AllCandidateSampler. +type AllCandidateSamplerAttr func(optionalAttr) -// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value. +// AllCandidateSamplerSeed sets the optional seed attribute to value. // -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr { +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed(value int64) AllCandidateSamplerAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed"] = value } } -// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value. +// AllCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// value: If `True`, the tensor passed to compute grad will be -// var + momentum * accum, so in the end, the var you get is actually -// var + momentum * accum. -// If not specified, defaults to false -func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr { +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr { return func(m optionalAttr) { - m["use_nesterov"] = value + m["seed2"] = value } } -// Update '*var' according to the momentum scheme. Set use_nesterov = True if you +// Generates labels for candidate sampling with a learned unigram distribution. // -// want to use Nesterov momentum. +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. // -// accum = accum * momentum - lr * grad -// var += accum +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to produce. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. // -// Returns the created operation. -func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) { +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, optional ...AllCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceApplyKerasMomentum", + Type: "AllCandidateSampler", Input: []tf.Input{ - var_, accum, lr, grad, momentum, + true_classes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Returns a serialized GraphDef representing `input_dataset`. -// -// Returns a graph representation for `input_dataset`. +// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is // -// Arguments: -// input_dataset: A variant tensor representing the dataset to return the graph representation for. +// true, this follows Python semantics in that the result here is consistent +// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`. // -// Returns The graph representation of the dataset (as serialized GraphDef). -func DatasetToGraph(scope *Scope, input_dataset tf.Output) (graph tf.Output) { +// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DatasetToGraph", + Type: "FloorMod", Input: []tf.Input{ - input_dataset, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// LuAttr is an optional argument to Lu. -type LuAttr func(optionalAttr) - -// LuOutputIdxType sets the optional output_idx_type attribute to value. -// If not specified, defaults to DT_INT32 -func LuOutputIdxType(value tf.DataType) LuAttr { - return func(m optionalAttr) { - m["output_idx_type"] = value - } -} - -// Computes the LU decomposition of one or more square matrices. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. -// -// The input has to be invertible. -// -// The output consists of two tensors LU and P containing the LU decomposition -// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and -// upper triangular factors. +// Saves the input tensors to disk. // -// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of -// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower -// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose -// entries correspond to the upper triangular part, including the diagonal, of LU. +// The size of `tensor_names` must match the number of tensors in `data`. `data[i]` +// is written to `filename` with name `tensor_names[i]`. // -// P represents a permutation matrix encoded as a list of indices each between `0` -// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to -// P, then the L, U and P satisfies P_mat * input = L * U. +// See also `SaveSlices`. // // Arguments: -// input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of -// size `[M, M]`. +// filename: Must have a single element. The name of the file to which we write +// the tensor. +// tensor_names: Shape `[N]`. The names of the tensors to be saved. +// data: `N` tensors to save. // -// Returns A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the -// lower triangular factor `L` with unit diagonal, and whose upper triangular part -// denotes the upper triangular factor `U`.Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is -// `[..., M]`. -// @compatibility(scipy) -// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are -// packed into a single tensor, the permutation is applied to `input` instead of -// the right hand side and the permutation `P` is returned as a list of indices -// instead of a permutation matrix. -// @end_compatibility -func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) { +// Returns the created operation. +func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Lu", + Type: "Save", Input: []tf.Input{ - input, + filename, tensor_names, tf.OutputList(data), }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return scope.AddOperation(opspec) } -// Deprecated. Use TensorArrayCloseV3 +// Returns x / y element-wise for integer types. // -// DEPRECATED at GraphDef version 26: Use TensorArrayCloseV3 +// Truncation designates that negative numbers will round fractional quantities +// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different +// than Python semantics. See `FloorDiv` for a division function that matches +// Python Semantics. // -// Returns the created operation. -func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { +// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayCloseV2", + Type: "TruncateDiv", Input: []tf.Input{ - handle, + x, y, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// EncodeBase64Attr is an optional argument to EncodeBase64. -type EncodeBase64Attr func(optionalAttr) +// RequantizePerChannelAttr is an optional argument to RequantizePerChannel. +type RequantizePerChannelAttr func(optionalAttr) -// EncodeBase64Pad sets the optional pad attribute to value. +// RequantizePerChannelOutType sets the optional out_type attribute to value. // -// value: Bool whether padding is applied at the ends. -// If not specified, defaults to false -func EncodeBase64Pad(value bool) EncodeBase64Attr { +// value: The quantized type of output tensor that needs to be converted. +// If not specified, defaults to DT_QUINT8 +func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr { return func(m optionalAttr) { - m["pad"] = value + m["out_type"] = value } } -// Encode strings into web-safe base64 format. -// -// Refer to the following article for more information on base64 format: -// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the -// end so that the encoded has length multiple of 4. See Padding section of the -// link above. -// -// Web-safe means that the encoder uses - and _ instead of + and /. +// Requantizes input with min and max values known per channel. // // Arguments: -// input: Strings to be encoded. +// input: The original input tensor. +// input_min: The minimum value of the input tensor +// input_max: The maximum value of the input tensor. +// requested_output_min: The minimum value of the output tensor requested. +// requested_output_max: The maximum value of the output tensor requested. // -// Returns Input strings encoded in base64. -func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) { +// Returns Output tensor.The minimum value of the final output tensorThe maximum value of the final output tensor. +func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } @@ -30208,538 +30236,376 @@ func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) ( a(attrs) } opspec := tf.OpSpec{ - Type: "EncodeBase64", + Type: "RequantizePerChannel", Input: []tf.Input{ - input, + input, input_min, input_max, requested_output_min, requested_output_max, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// A dataset that creates window datasets from the input dataset. +// Restores tensors from a V2 checkpoint. // -// Arguments: +// For backward compatibility with the V1 format, this Op currently allows +// restoring from a V1 checkpoint as well: +// - This Op first attempts to find the V2 index file pointed to by "prefix", and +// if found proceed to read it as a V2 checkpoint; +// - Otherwise the V1 read path is invoked. +// Relying on this behavior is not recommended, as the ability to fall back to read +// V1 might be deprecated and eventually removed. // -// size: A scalar representing the number of elements to accumulate in a window. -// shift: A scalar representing the steps moving the sliding window forward in one -// iteration. It must be positive. -// stride: A scalar representing the stride of the input elements of the sliding window. -// It must be positive. -// drop_remainder: A scalar representing whether a window should be dropped in case its size is -// smaller than desired. +// By default, restores the named tensors in full. If the caller wishes to restore +// specific slices of stored tensors, "shape_and_slices" should be non-empty +// strings and correspondingly well-formed. // +// Callers must ensure all the named tensors are indeed stored in the checkpoint. // -func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Arguments: +// prefix: Must have a single element. The prefix of a V2 checkpoint. +// tensor_names: shape {N}. The names of the tensors to be restored. +// shape_and_slices: shape {N}. The slice specs of the tensors to be restored. +// Empty strings indicate that they are non-partitioned tensors. +// dtypes: shape {N}. The list of expected dtype for the tensors. Must match +// those stored in the checkpoint. +// +// Returns shape {N}. The restored tensors, whose shapes are read from the +// checkpoint directly. +func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"dtypes": dtypes} opspec := tf.OpSpec{ - Type: "WindowDataset", + Type: "RestoreV2", Input: []tf.Input{ - input_dataset, size, shift, stride, drop_remainder, + prefix, tensor_names, shape_and_slices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the matrix square root of one or more square matrices: -// -// matmul(sqrtm(A), sqrtm(A)) = A -// -// The input matrix should be invertible. If the input matrix is real, it should -// have no eigenvalues which are real and negative (pairs of complex conjugate -// eigenvalues are allowed). -// -// The matrix square root is computed by first reducing the matrix to -// quasi-triangular form with the real Schur decomposition. The square root -// of the quasi-triangular matrix is then computed directly. Details of -// the algorithm can be found in: Nicholas J. Higham, "Computing real -// square roots of a real matrix", Linear Algebra Appl., 1987. -// -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the matrix square root for all input submatrices `[..., :, :]`. -// -// Arguments: -// input: Shape is `[..., M, M]`. -// -// Returns Shape is `[..., M, M]`. -// -// @compatibility(scipy) -// Equivalent to scipy.linalg.sqrtm -// @end_compatibility -func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "MatrixSquareRoot", - Input: []tf.Input{ - input, - }, + var idx int + var err error + if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil { + scope.UpdateErr("RestoreV2", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return tensors } -// SvdAttr is an optional argument to Svd. -type SvdAttr func(optionalAttr) +// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. +type FIFOQueueV2Attr func(optionalAttr) -// SvdComputeUv sets the optional compute_uv attribute to value. +// FIFOQueueV2Shapes sets the optional shapes attribute to value. // -// value: If true, left and right singular vectors will be -// computed and returned in `u` and `v`, respectively. -// If false, `u` and `v` are not set and should never referenced. -// If not specified, defaults to true -func SvdComputeUv(value bool) SvdAttr { +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { return func(m optionalAttr) { - m["compute_uv"] = value + m["shapes"] = value } } -// SvdFullMatrices sets the optional full_matrices attribute to value. +// FIFOQueueV2Capacity sets the optional capacity attribute to value. // -// value: If true, compute full-sized `u` and `v`. If false -// (the default), compute only the leading `P` singular vectors. -// Ignored if `compute_uv` is `False`. -// If not specified, defaults to false -func SvdFullMatrices(value bool) SvdAttr { +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { return func(m optionalAttr) { - m["full_matrices"] = value + m["capacity"] = value } } -// Computes the singular value decompositions of one or more matrices. +// FIFOQueueV2Container sets the optional container attribute to value. // -// Computes the SVD of each inner matrix in `input` such that -// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func FIFOQueueV2Container(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// FIFOQueueV2SharedName sets the optional shared_name attribute to value. // -// ```python -// # a is a tensor containing a batch of matrices. -// # s is a tensor of singular values for each matrix. -// # u is the tensor containing of left singular vectors for each matrix. -// # v is the tensor containing of right singular vectors for each matrix. -// s, u, v = svd(a) -// s, _, _ = svd(a, compute_uv=False) -// ``` +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that produces elements in first-in first-out order. // // Arguments: -// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions -// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. +// component_types: The type of each component in a value. // -// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., M, P]`; if `full_matrices` is `True` then shape is -// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is -// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. -// Undefined if `compute_uv` is false. -func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { +// Returns The handle to the queue. +func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Svd", - Input: []tf.Input{ - input, - }, + Type: "FIFOQueueV2", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Converts one or more images from RGB to HSV. -// -// Outputs a tensor of the same shape as the `images` tensor, containing the HSV -// value of the pixels. The output is only well defined if the value in `images` -// are in `[0,1]`. -// -// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and -// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 -// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. -// -// Arguments: -// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. -// -// Returns `images` converted to HSV. -func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { +// Creates a dataset that contains the elements of `input_dataset` ignoring errors. +func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "RGBToHSV", + Type: "ExperimentalIgnoreErrorsDataset", Input: []tf.Input{ - images, + input_dataset, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Does nothing. Only useful as a placeholder for control edges. -// -// Returns the created operation. -func NoOp(scope *Scope) (o *tf.Operation) { +// Returns 0 if x == 0, and x / y otherwise, elementwise. +func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "NoOp", - } - return scope.AddOperation(opspec) -} - -// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints. -type MergeV2CheckpointsAttr func(optionalAttr) - -// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value. -// -// value: see above. -// If not specified, defaults to true -func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr { - return func(m optionalAttr) { - m["delete_old_dirs"] = value + Type: "Xdivy", + Input: []tf.Input{ + x, y, + }, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// V2 format specific: merges the metadata files of sharded checkpoints. The -// -// result is one logical checkpoint, with one physical metadata file and renamed -// data files. +// Bucketizes 'input' based on 'boundaries'. // -// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup. +// For example, if the inputs are +// boundaries = [0, 10, 100] +// input = [[-5, 10000] +// [150, 10] +// [5, 100]] // -// If delete_old_dirs is true, attempts to delete recursively the dirname of each -// path in the input checkpoint_prefixes. This is useful when those paths are non -// user-facing temporary locations. +// then the output will be +// output = [[0, 3] +// [3, 2] +// [1, 3]] // // Arguments: -// checkpoint_prefixes: prefixes of V2 checkpoints to merge. -// destination_prefix: scalar. The desired final prefix. Allowed to be the same -// as one of the checkpoint_prefixes. +// input: Any shape of Tensor contains with int or float type. +// boundaries: A sorted list of floats gives the boundary of the buckets. // -// Returns the created operation. -func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) { +// Returns Same shape with 'input', each value of input replaced with bucket index. +// +// @compatibility(numpy) +// Equivalent to np.digitize. +// @end_compatibility +func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"boundaries": boundaries} opspec := tf.OpSpec{ - Type: "MergeV2Checkpoints", + Type: "Bucketize", Input: []tf.Input{ - checkpoint_prefixes, destination_prefix, + input, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Saves input tensors slices to disk. +// Calculates gains for each feature and returns the best possible split information for the feature. // -// This is like `Save` except that tensors can be listed in the saved file as being -// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the -// larger tensor and the slice that this tensor covers. `shapes_and_slices` must -// have as many elements as `tensor_names`. -// -// Elements of the `shapes_and_slices` input must either be: -// -// * The empty string, in which case the corresponding tensor is -// saved normally. -// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the -// `dimI` are the dimensions of the larger tensor and `slice-spec` -// specifies what part is covered by the tensor to save. +// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature. // -// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` -// where each `sliceI` is either: +// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split. // -// * The string `-` meaning that the slice covers all indices of this dimension -// * `start,length` where `start` and `length` are integers. In that -// case the slice covers `length` indices starting at `start`. +// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features). // -// See also `Save`. +// The length of output lists are all of the same length, `num_features`. +// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature. // // Arguments: -// filename: Must have a single element. The name of the file to which we write the -// tensor. -// tensor_names: Shape `[N]`. The names of the tensors to be saved. -// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when -// saving the tensors. -// data: `N` tensors to save. +// node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive). +// stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used. +// l1: l1 regularization factor on leaf weights, per instance based. +// l2: l2 regularization factor on leaf weights, per instance based. +// tree_complexity: adjustment to the gain, per leaf based. +// min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting. +// max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors. // -// Returns the created operation. -func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { +// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node. +func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"max_splits": max_splits} opspec := tf.OpSpec{ - Type: "SaveSlices", + Type: "BoostedTreesCalculateBestGainsPerFeature", Input: []tf.Input{ - filename, tensor_names, shapes_and_slices, tf.OutputList(data), + node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight, }, + Attrs: attrs, } - return scope.AddOperation(opspec) -} - -// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation. -type DenseToDenseSetOperationAttr func(optionalAttr) - -// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value. -// If not specified, defaults to true -func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr { - return func(m optionalAttr) { - m["validate_indices"] = value - } -} - -// Applies set operation along last dimension of 2 `Tensor` inputs. -// -// See SetOperationOp::SetOperationFromContext for values of `set_operation`. -// -// Output `result` is a `SparseTensor` represented by `result_indices`, -// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this -// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` -// dimension contains the result of `set_operation` applied to the corresponding -// `[0...n-1]` dimension of `set`. -// -// Arguments: -// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. -// Dimension `n` contains values in a set, duplicates are allowed but ignored. -// -// -// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is -// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` -// is the max result set size across all `0...n-1` dimensions. -func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { + op := scope.AddOperation(opspec) if scope.Err() != nil { return } - attrs := map[string]interface{}{"set_operation": set_operation} - for _, a := range optional { - a(attrs) + var idx int + var err error + if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return } - opspec := tf.OpSpec{ - Type: "DenseToDenseSetOperation", - Input: []tf.Input{ - set1, set2, - }, - Attrs: attrs, + if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - -// Generate a sharded filename. The filename is printf formatted as -// -// %s-%05d-of-%05d, basename, shard, num_shards. -func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { - if scope.Err() != nil { + if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) return } - opspec := tf.OpSpec{ - Type: "ShardedFilename", - Input: []tf.Input{ - basename, shard, num_shards, - }, + if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil { + scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err) + return + } + return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list } -// BatchToSpace for N-D tensors of type T. -// -// This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape -// `block_shape + [batch]`, interleaves these blocks back into the grid defined by -// the spatial dimensions `[1, ..., M]`, to obtain a result with the same rank as -// the input. The spatial dimensions of this intermediate result are then -// optionally cropped according to `crops` to produce the output. This is the -// reverse of SpaceToBatch. See below for a precise description. -// -// Arguments: -// input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`, -// where spatial_shape has M dimensions. -// block_shape: 1-D with shape `[M]`, all values must be >= 1. -// crops: 2-D with shape `[M, 2]`, all values must be >= 0. -// `crops[i] = [crop_start, crop_end]` specifies the amount to crop from input -// dimension `i + 1`, which corresponds to spatial dimension `i`. It is -// required that -// `crop_start[i] + crop_end[i] <= block_shape[i] * input_shape[i + 1]`. -// -// This operation is equivalent to the following steps: -// -// 1. Reshape `input` to `reshaped` of shape: -// [block_shape[0], ..., block_shape[M-1], -// batch / prod(block_shape), -// input_shape[1], ..., input_shape[N-1]] -// -// 2. Permute dimensions of `reshaped` to produce `permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1], block_shape[0], -// ..., -// input_shape[M], block_shape[M-1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// 3. Reshape `permuted` to produce `reshaped_permuted` of shape -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0], -// ..., -// input_shape[M] * block_shape[M-1], -// -// input_shape[M+1], -// ..., -// input_shape[N-1]] -// -// 4. Crop the start and end of dimensions `[1, ..., M]` of -// `reshaped_permuted` according to `crops` to produce the output of shape: -// [batch / prod(block_shape), -// -// input_shape[1] * block_shape[0] - crops[0,0] - crops[0,1], -// ..., -// input_shape[M] * block_shape[M-1] - crops[M-1,0] - crops[M-1,1], -// -// input_shape[M+1], ..., input_shape[N-1]] -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 3]` and value: -// -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` -// -// (3) For the following input of shape `[4, 2, 2, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [0, 0]]`: -// -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` +// EncodePngAttr is an optional argument to EncodePng. +type EncodePngAttr func(optionalAttr) + +// EncodePngCompression sets the optional compression attribute to value. // -// The output tensor has shape `[1, 4, 4, 1]` and value: +// value: Compression level. +// If not specified, defaults to -1 +func EncodePngCompression(value int64) EncodePngAttr { + return func(m optionalAttr) { + m["compression"] = value + } +} + +// PNG-encode an image. // -// ``` -// x = [[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]] -// ``` +// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` +// where `channels` is: // -// (4) For the following input of shape `[8, 1, 3, 1]`, `block_shape = [2, 2]`, and -// `crops = [[0, 0], [2, 0]]`: +// * 1: for grayscale. +// * 2: for grayscale + alpha. +// * 3: for RGB. +// * 4: for RGBA. // -// ``` -// x = [[[[0], [1], [3]]], [[[0], [9], [11]]], -// [[[0], [2], [4]]], [[[0], [10], [12]]], -// [[[0], [5], [7]]], [[[0], [13], [15]]], -// [[[0], [6], [8]]], [[[0], [14], [16]]]] -// ``` +// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder +// default or a value from 0 to 9. 9 is the highest compression level, generating +// the smallest output, but is slower. // -// The output tensor has shape `[2, 2, 4, 1]` and value: +// Arguments: +// image: 3-D with shape `[height, width, channels]`. // -// ``` -// x = [[[[1], [2], [3], [4]], -// [[5], [6], [7], [8]]], -// [[[9], [10], [11], [12]], -// [[13], [14], [15], [16]]]] -// ``` -func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops tf.Output) (output tf.Output) { +// Returns 0-D. PNG-encoded image. +func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchToSpaceND", + Type: "EncodePng", Input: []tf.Input{ - input, block_shape, crops, + image, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// UnpackAttr is an optional argument to Unpack. -type UnpackAttr func(optionalAttr) +// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2. +type QueueDequeueUpToV2Attr func(optionalAttr) -// UnpackAxis sets the optional axis attribute to value. +// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value. // -// value: Dimension along which to unpack. Negative values wrap around, so the -// valid range is `[-R, R)`. -// If not specified, defaults to 0 -func UnpackAxis(value int64) UnpackAttr { +// value: If the queue has fewer than n elements, this operation +// will block for up to timeout_ms milliseconds. +// Note: This option is not supported yet. +// If not specified, defaults to -1 +func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr { return func(m optionalAttr) { - m["axis"] = value + m["timeout_ms"] = value } } -// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors. +// Dequeues `n` tuples of one or more tensors from the given queue. // -// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension. -// For example, given a tensor of shape `(A, B, C, D)`; +// This operation is not supported by all queues. If a queue does not support +// DequeueUpTo, then an Unimplemented error is returned. // -// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]` -// and each tensor in `output` will have shape `(B, C, D)`. (Note that the -// dimension unpacked along is gone, unlike `split`). +// If the queue is closed and there are more than 0 but less than `n` +// elements remaining, then instead of returning an OutOfRange error like +// QueueDequeueMany, less than `n` elements are returned immediately. If +// the queue is closed and there are 0 elements left in the queue, then +// an OutOfRange error is returned just like in QueueDequeueMany. +// Otherwise the behavior is identical to QueueDequeueMany: // -// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]` -// and each tensor in `output` will have shape `(A, C, D)`. -// Etc. +// This operation concatenates queue-element component tensors along the +// 0th dimension to make a single component tensor. All of the components +// in the dequeued tuple will have size n in the 0th dimension. // -// This is the opposite of `pack`. +// This operation has `k` outputs, where `k` is the number of components in +// the tuples stored in the given queue, and output `i` is the ith +// component of the dequeued tuple. // // Arguments: -// value: 1-D or higher, with `axis` dimension size equal to `num`. -// +// handle: The handle to a queue. +// n: The number of tuples to dequeue. +// component_types: The type of each component in a tuple. // -// Returns The list of tensors unpacked from `value`. -func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) { +// Returns One or more tensors that were dequeued as a tuple. +func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num": num} + attrs := map[string]interface{}{"component_types": component_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Unpack", + Type: "QueueDequeueUpToV2", Input: []tf.Input{ - value, + handle, n, }, Attrs: attrs, } @@ -30749,345 +30615,251 @@ func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (o } var idx int var err error - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - scope.UpdateErr("Unpack", err) + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("QueueDequeueUpToV2", err) return } - return output + return components } -// Increments variable pointed to by 'resource' until it reaches 'limit'. -// -// Arguments: -// resource: Should be from a scalar `Variable` node. -// limit: If incrementing ref would bring it above limit, instead generates an -// 'OutOfRange' error. -// +// Returns the max of x and y (i.e. x > y ? x : y) element-wise. // -// Returns A copy of the input before increment. If nothing else modifies the -// input, the values produced will all be distinct. -func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) { +// *NOTE*: `Maximum` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"limit": limit, "T": T} opspec := tf.OpSpec{ - Type: "ResourceCountUpTo", + Type: "Maximum", Input: []tf.Input{ - resource, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Delete the stack from its resource container. +// Returns element-wise remainder of division. This emulates C semantics in that // -// Arguments: -// handle: The handle to a stack. +// the result here is consistent with a truncating divide. E.g. +// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`. // -// Returns the created operation. -func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { +// *NOTE*: `Mod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "StackCloseV2", + Type: "Mod", Input: []tf.Input{ - handle, + x, y, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Generate a glob pattern matching all sharded file names. -func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { +// Returns element-wise remainder of division. This emulates C semantics in that +// +// the result here is consistent with a truncating divide. E.g. `truncate(x / y) * +// y + truncate_mod(x, y) = x`. +// +// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ShardedFilespec", + Type: "TruncateMod", Input: []tf.Input{ - basename, num_shards, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// TextLineReaderV2Attr is an optional argument to TextLineReaderV2. -type TextLineReaderV2Attr func(optionalAttr) - -// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value. +// Computes offsets of concat inputs within its output. // -// value: Number of lines to skip from the beginning of every file. -// If not specified, defaults to 0 -func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr { - return func(m optionalAttr) { - m["skip_header_lines"] = value +// For example: +// +// ``` +// # 'x' is [2, 2, 7] +// # 'y' is [2, 3, 7] +// # 'z' is [2, 5, 7] +// concat_offset(2, [x, y, z]) => [0, 0, 0], [0, 2, 0], [0, 5, 0] +// ``` +// +// This is typically used by gradient computations for a concat operation. +// +// Arguments: +// concat_dim: The dimension along which to concatenate. +// shape: The `N` int32 vectors representing shape of tensors being concatenated. +// +// Returns The `N` int32 vectors representing the starting offset +// of input tensors within the concatenated output. +func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset []tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConcatOffset", + Input: []tf.Input{ + concat_dim, tf.OutputList(shape), + }, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if offset, idx, err = makeOutputList(op, idx, "offset"); err != nil { + scope.UpdateErr("ConcatOffset", err) + return } + return offset } -// TextLineReaderV2Container sets the optional container attribute to value. +// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug. +type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr) + +// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TextLineReaderV2Container(value string) TextLineReaderV2Attr { +// REQUIRES: value >= -1 +func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["container"] = value + m["table_id"] = value } } -// TextLineReaderV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. +// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value. // If not specified, defaults to "" -func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr { +func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr { return func(m optionalAttr) { - m["shared_name"] = value + m["table_name"] = value } } -// A Reader that outputs the lines of a file delimited by '\n'. +// Load RMSProp embedding parameters with debug support. // -// Returns The handle to reference the Reader. -func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) { +// An op that loads optimization parameters into HBM for embedding. Must be +// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct +// embedding table configuration. For example, this op is used to install +// parameters that are loaded from a checkpoint before a training loop is +// executed. +// +// Arguments: +// parameters: Value of parameters used in the RMSProp optimization algorithm. +// ms: Value of ms used in the RMSProp optimization algorithm. +// mom: Value of mom used in the RMSProp optimization algorithm. +// gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm. +// +// +// +// Returns the created operation. +func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TextLineReaderV2", - + Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug", + Input: []tf.Input{ + parameters, ms, mom, gradient_accumulators, + }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix. -type LoadAndRemapMatrixAttr func(optionalAttr) - -// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value. -// -// value: The maximum number of rows to load from the checkpoint at -// once. If less than or equal to 0, the entire matrix will be loaded into -// memory. Setting this arg trades increased disk reads for lower memory usage. -// If not specified, defaults to -1 -func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr { - return func(m optionalAttr) { - m["max_rows_in_memory"] = value - } + return scope.AddOperation(opspec) } -// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint +// Compute the lower regularized incomplete Gamma function `P(a, x)`. // -// at `ckpt_path` and potentially reorders its rows and columns using the -// specified remappings. +// The lower regularized incomplete Gamma function is defined as: // -// Most users should use one of the wrapper initializers (such as -// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this -// function directly. // -// The remappings are 1-D tensors with the following properties: +// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\) // -// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output -// matrix will be initialized from the row corresponding to index -// `row_remapping[i]` in the old `Tensor` from the checkpoint. -// * `col_remapping` must have either 0 entries (indicating that no column -// reordering is needed) or `num_cols` entries. If specified, column `j` of the -// output matrix will be initialized from the column corresponding to index -// `col_remapping[j]` in the old `Tensor` from the checkpoint. -// * A value of -1 in either of the remappings signifies a "missing" entry. In that -// case, values from the `initializing_values` tensor will be used to fill that -// missing row or column. If `row_remapping` has `r` missing entries and -// `col_remapping` has `c` missing entries, then the following condition must be -// true: +// where // -// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` +// \\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\) // -// The remapping tensors can be generated using the GenerateVocabRemapping op. +// is the lower incomplete Gamma function. // -// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], -// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing -// the value from row i, column j of the old tensor in the checkpoint, the output -// matrix will look like the following: -// -// [[w(1, 0), w(1, 2), 0.5], -// [w(0, 0), w(0, 2), -0.5], -// [0.25, -0.25, 42]] -// -// Arguments: -// ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from -// which the old matrix `Tensor` will be loaded. -// old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. -// row_remapping: An int `Tensor` of row remappings (generally created by -// `generate_vocab_remapping`). Even if no row remapping is needed, this must -// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted -// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). -// col_remapping: An int `Tensor` of column remappings (generally created by -// `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping -// is to be done (e.g. column ordering is the same). -// initializing_values: A float `Tensor` containing values to fill in for cells -// in the output matrix that are not loaded from the checkpoint. Length must be -// exactly the same as the number of missing / new cells. -// num_rows: Number of rows (length of the 1st dimension) in the output matrix. -// num_cols: Number of columns (length of the 2nd dimension) in the output matrix. -// -// Returns Output matrix containing existing values loaded from the -// checkpoint, and with any missing values filled in from initializing_values. -func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) { +// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete +// Gamma function. +func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "LoadAndRemapMatrix", + Type: "Igamma", Input: []tf.Input{ - ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, + a, x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2. -type TFRecordReaderV2Attr func(optionalAttr) - -// TFRecordReaderV2Container sets the optional container attribute to value. +// Compute the regularized incomplete beta integral \\(I_x(a, b)\\). // -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// TFRecordReaderV2SharedName sets the optional shared_name attribute to value. +// The regularized incomplete beta integral is defined as: // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value. -// If not specified, defaults to "" -func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr { - return func(m optionalAttr) { - m["compression_type"] = value - } -} - -// A Reader that outputs the records from a TensorFlow Records file. // -// Returns The handle to reference the Reader. -func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "TFRecordReaderV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. -type QuantizeAndDequantizeV3Attr func(optionalAttr) - -// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["signed_input"] = value - } -} - -// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. -// If not specified, defaults to true -func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { - return func(m optionalAttr) { - m["range_given"] = value - } -} - -// Quantizes then dequantizes a tensor. +// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\) // -// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a -// tensor, so its value can change during training. -func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { +// where +// +// +// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\) +// +// +// is the incomplete beta function and \\(B(a, b)\\) is the *complete* +// beta function. +func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QuantizeAndDequantizeV3", + Type: "Betainc", Input: []tf.Input{ - input, input_min, input_max, num_bits, + a, b, x, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// IdentityReaderV2Attr is an optional argument to IdentityReaderV2. -type IdentityReaderV2Attr func(optionalAttr) +// ShapeAttr is an optional argument to Shape. +type ShapeAttr func(optionalAttr) -// IdentityReaderV2Container sets the optional container attribute to value. -// -// value: If non-empty, this reader is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func IdentityReaderV2Container(value string) IdentityReaderV2Attr { +// ShapeOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_INT32 +func ShapeOutType(value tf.DataType) ShapeAttr { return func(m optionalAttr) { - m["container"] = value + m["out_type"] = value } } -// IdentityReaderV2SharedName sets the optional shared_name attribute to value. +// Returns the shape of a tensor. // -// value: If non-empty, this reader is named in the given bucket -// with this shared_name. Otherwise, the node name is used instead. -// If not specified, defaults to "" -func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A Reader that outputs the queued work as both the key and value. +// This operation returns a 1-D integer tensor representing the shape of `input`. // -// To use, enqueue strings in a Queue. ReaderRead will take the front -// work string and output (work, work). +// For example: // -// Returns The handle to reference the Reader. -func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) { +// ``` +// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] +// shape(t) ==> [2, 2, 3] +// ``` +func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -31096,242 +30868,174 @@ func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_ha a(attrs) } opspec := tf.OpSpec{ - Type: "IdentityReaderV2", - + Type: "Shape", + Input: []tf.Input{ + input, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent. -type ResourceApplyGradientDescentAttr func(optionalAttr) - -// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, the subtraction will be protected by a lock; -// otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' by subtracting 'alpha' * 'delta' from it. +// Computes fingerprints of the input strings. // // Arguments: -// var_: Should be from a Variable(). -// alpha: Scaling factor. Must be a scalar. -// delta: The change. +// input: vector of strings to compute fingerprints on. // -// Returns the created operation. -func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) { +// Returns a (N,2) shaped matrix where N is the number of elements in the input +// vector. Each row contains the low and high parts of the fingerprint. +func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResourceApplyGradientDescent", + Type: "SdcaFprint", Input: []tf.Input{ - var_, alpha, delta, + input, }, - Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Returns the next record (key, value pair) produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). +// Computes the power of one value to another. // -// Arguments: -// reader_handle: Handle to a Reader. -// queue_handle: Handle to a Queue, with string work items. +// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for +// corresponding elements in `x` and `y`. For example: // -// Returns A scalar.A scalar. -func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { +// ``` +// # tensor 'x' is [[2, 2]], [3, 3]] +// # tensor 'y' is [[8, 16], [2, 3]] +// tf.pow(x, y) ==> [[256, 65536], [9, 27]] +// ``` +func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReaderReadV2", + Type: "Pow", Input: []tf.Input{ - reader_handle, queue_handle, + x, y, }, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Returns up to `num_records` (key, value) pairs produced by a Reader. -// -// Will dequeue from the input queue if necessary (e.g. when the -// Reader needs to start reading from a new file since it has finished -// with the previous file). -// It may return less than `num_records` even before the last batch. -// -// Arguments: -// reader_handle: Handle to a `Reader`. -// queue_handle: Handle to a `Queue`, with string work items. -// num_records: number of records to read from `Reader`. -// -// Returns A 1-D tensor.A 1-D tensor. -func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ReaderReadUpToV2", - Input: []tf.Input{ - reader_handle, queue_handle, num_records, - }, +// QuantizedReluXAttr is an optional argument to QuantizedReluX. +type QuantizedReluXAttr func(optionalAttr) + +// QuantizedReluXOutType sets the optional out_type attribute to value. +// If not specified, defaults to DT_QUINT8 +func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr { + return func(m optionalAttr) { + m["out_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) } -// Adds v into specified rows of x. -// -// Computes y = x; y[i, :] += v; return y. +// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)` // // Arguments: -// x: A `Tensor` of type T. -// i: A vector. Indices into the left-most dimension of `x`. -// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. // -// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. -func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { +// +// min_features: The float value that the lowest quantized value represents. +// max_features: The float value that the highest quantized value represents. +// +// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents. +func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "InplaceAdd", + Type: "QuantizedReluX", Input: []tf.Input{ - x, i, v, + features, max_value, min_features, max_features, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Restore a Reader to its initial clean state. -// -// Arguments: -// reader_handle: Handle to a Reader. +// Returns the truth value of (x < y) element-wise. // -// Returns the created operation. -func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { +// *NOTE*: `Less` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ReaderResetV2", + Type: "Less", Input: []tf.Input{ - reader_handle, + x, y, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// BatchAttr is an optional argument to Batch. -type BatchAttr func(optionalAttr) +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) -// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. -// If not specified, defaults to 10 -func BatchMaxEnqueuedBatches(value int64) BatchAttr { +// RandomPoissonSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed(value int64) RandomPoissonAttr { return func(m optionalAttr) { - m["max_enqueued_batches"] = value + m["seed"] = value } } -// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. -// If not specified, defaults to <> -func BatchAllowedBatchSizes(value []int64) BatchAttr { +// RandomPoissonSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func RandomPoissonSeed2(value int64) RandomPoissonAttr { return func(m optionalAttr) { - m["allowed_batch_sizes"] = value + m["seed2"] = value } } -// BatchContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func BatchContainer(value string) BatchAttr { - return func(m optionalAttr) { - m["container"] = value +// Use RandomPoissonV2 instead. +// +// DEPRECATED at GraphDef version 25: Replaced by RandomPoissonV2 +func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonAttr) (output tf.Output) { + if scope.Err() != nil { + return } -} - -// BatchSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func BatchSharedName(value string) BatchAttr { - return func(m optionalAttr) { - m["shared_name"] = value + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } -} - -// BatchBatchingQueue sets the optional batching_queue attribute to value. -// If not specified, defaults to "" -func BatchBatchingQueue(value string) BatchAttr { - return func(m optionalAttr) { - m["batching_queue"] = value + opspec := tf.OpSpec{ + Type: "RandomPoisson", + Input: []tf.Input{ + shape, rate, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Batches all input tensors nondeterministically. -// -// When many instances of this Op are being run concurrently with the same -// container/shared_name in the same device, some will output zero-shaped Tensors -// and others will output Tensors of size up to max_batch_size. -// -// All Tensors in in_tensors are batched together (so, for example, labels and -// features should be batched with a single instance of this operation. -// -// Each invocation of batch emits an `id` scalar which will be used to identify -// this particular invocation when doing unbatch or its gradient. -// -// Each op which emits a non-empty batch will also emit a non-empty batch_index -// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, -// start, and length of elements of each set of Tensors present in batched_tensors. -// -// Batched tensors are concatenated along the first dimension, and all tensors in -// in_tensors must have the first dimension of the same size. +// Gets the next output from the given iterator. // -// in_tensors: The tensors to be batched. -// num_batch_threads: Number of scheduling threads for processing batches of work. -// Determines the number of batches processed in parallel. -// max_batch_size: Batch sizes will never be bigger than this. -// batch_timeout_micros: Maximum number of microseconds to wait before outputting -// an incomplete batch. -// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does -// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad -// batches up to one of those sizes. The entries must increase monotonically, and -// the final entry must equal max_batch_size. -// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. -// batched_tensors: Either empty tensors or a batch of concatenated Tensors. -// batch_index: If out_tensors is non-empty, has information to invert it. -// container: Controls the scope of sharing of this batch. -// id: always contains a scalar with a unique ID for this invocation of Batch. -// shared_name: Concurrently running instances of batch in the same device with the -// same container and shared_name will batch their elements together. If left -// empty, the op name will be used as the shared name. -// T: the types of tensors to be batched. -func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { +// This operation is a synchronous version IteratorGetNext. It should only be used +// in situations where the iterator does not block the calling thread, or where +// the calling thread is not a member of the thread pool used to execute parallel +// operations (e.g. in eager mode). +func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Batch", + Type: "IteratorGetNextSync", Input: []tf.Input{ - tf.OutputList(in_tensors), + iterator, }, Attrs: attrs, } @@ -31341,68 +31045,44 @@ func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_ba } var idx int var err error - if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { - scope.UpdateErr("Batch", err) + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("IteratorGetNextSync", err) return } - batch_index = op.Output(idx) - id = op.Output(idx) - return batched_tensors, batch_index, id + return components } -// Adjust the hue of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last dimension is -// interpretted as channels, and must be three. -// -// The input image is considered in the RGB colorspace. Conceptually, the RGB -// colors are first mapped into HSV. A delta is then applied all the hue values, -// and then remapped back to RGB colorspace. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// delta: A float delta to add to the hue. +// Returns the truth value of (x >= y) element-wise. // -// Returns The hue-adjusted image or images. -func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) { +// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "AdjustHue", + Type: "GreaterEqual", Input: []tf.Input{ - images, delta, + x, y, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. -type ResizeBicubicGradAttr func(optionalAttr) +// ApproximateEqualAttr is an optional argument to ApproximateEqual. +type ApproximateEqualAttr func(optionalAttr) -// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. -// If not specified, defaults to false -func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { +// ApproximateEqualTolerance sets the optional tolerance attribute to value. +// If not specified, defaults to 1e-05 +func ApproximateEqualTolerance(value float32) ApproximateEqualAttr { return func(m optionalAttr) { - m["align_corners"] = value + m["tolerance"] = value } } -// Computes the gradient of bicubic interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, -// The image tensor that was resized. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. -// Gradients with respect to the input image. Input image must have been -// float or double. -func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { +// Returns the truth value of abs(x-y) < tolerance element-wise. +func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) { if scope.Err() != nil { return } @@ -31411,9 +31091,9 @@ func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "ResizeBicubicGrad", + Type: "ApproximateEqual", Input: []tf.Input{ - grads, original_image, + x, y, }, Attrs: attrs, } @@ -31421,113 +31101,57 @@ func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, return op.Output(0) } -// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor. -type ResizeNearestNeighborAttr func(optionalAttr) - -// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value. -// -// value: If true, the centers of the 4 corner pixels of the input and output tensors are -// aligned, preserving the values at the corner pixels. Defaults to false. -// If not specified, defaults to false -func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr { - return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Resize `images` to `size` using nearest neighbor interpolation. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The -// new size for the images. +// Returns the truth value of x OR y element-wise. // -// Returns 4-D with shape -// `[batch, new_height, new_width, channels]`. -func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) { +// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "ResizeNearestNeighbor", + Type: "LogicalOr", Input: []tf.Input{ - images, size, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad. -type ResizeNearestNeighborGradAttr func(optionalAttr) +// MatMulAttr is an optional argument to MatMul. +type MatMulAttr func(optionalAttr) -// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value. +// MatMulTransposeA sets the optional transpose_a attribute to value. // -// value: If true, the centers of the 4 corner pixels of the input and grad tensors are -// aligned. Defaults to false. +// value: If true, "a" is transposed before multiplication. // If not specified, defaults to false -func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr { +func MatMulTransposeA(value bool) MatMulAttr { return func(m optionalAttr) { - m["align_corners"] = value - } -} - -// Computes the gradient of nearest neighbor interpolation. -// -// Arguments: -// grads: 4-D with shape `[batch, height, width, channels]`. -// size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The -// original input size. -// -// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients -// with respect to the input image. -func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResizeNearestNeighborGrad", - Input: []tf.Input{ - grads, size, - }, - Attrs: attrs, + m["transpose_a"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. -type ExtractJpegShapeAttr func(optionalAttr) - -// ExtractJpegShapeOutputType sets the optional output_type attribute to value. +// MatMulTransposeB sets the optional transpose_b attribute to value. // -// value: (Optional) The output type of the operation (int32 or int64). -// Defaults to int32. -// If not specified, defaults to DT_INT32 -func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr { +// value: If true, "b" is transposed before multiplication. +// If not specified, defaults to false +func MatMulTransposeB(value bool) MatMulAttr { return func(m optionalAttr) { - m["output_type"] = value + m["transpose_b"] = value } } -// Extract the shape information of a JPEG-encoded image. -// -// This op only parses the image header, so it is much faster than DecodeJpeg. +// Multiply the matrix "a" by the matrix "b". // -// Arguments: -// contents: 0-D. The JPEG-encoded image. +// The inputs must be two-dimensional matrices and the inner dimension of +// "a" (after being transposed if transpose_a is true) must match the +// outer dimension of "b" (after being transposed if transposed_b is +// true). // -// Returns 1-D. The image shape with format [height, width, channels]. -func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) { +// *Note*: The default kernel implementation for MatMul on GPUs uses +// cublas. +func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) { if scope.Err() != nil { return } @@ -31536,9 +31160,9 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS a(attrs) } opspec := tf.OpSpec{ - Type: "ExtractJpegShape", + Type: "MatMul", Input: []tf.Input{ - contents, + a, b, }, Attrs: attrs, } @@ -31546,132 +31170,97 @@ func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegS return op.Output(0) } -// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2. -type PaddingFIFOQueueV2Attr func(optionalAttr) - -// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. -// Shapes of fixed rank but variable size are allowed by setting -// any shape dimension to -1. In this case, the inputs' shape may vary along -// the given dimension, and DequeueMany will pad the given dimension with -// zeros up to the maximum shape of all elements in the given batch. -// If the length of this attr is 0, different queue elements may have -// different ranks and shapes, but only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} +// InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2. +type InitializeTableFromTextFileV2Attr func(optionalAttr) -// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value. +// InitializeTableFromTextFileV2VocabSize sets the optional vocab_size attribute to value. // -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. +// value: Number of elements of the file, use -1 if unknown. // If not specified, defaults to -1 -func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// PaddingFIFOQueueV2Container sets the optional container attribute to value. // -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr { +// REQUIRES: value >= -1 +func InitializeTableFromTextFileV2VocabSize(value int64) InitializeTableFromTextFileV2Attr { return func(m optionalAttr) { - m["container"] = value + m["vocab_size"] = value } } -// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value. +// InitializeTableFromTextFileV2Delimiter sets the optional delimiter attribute to value. // -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr { +// value: Delimiter to separate fields in a line. +// If not specified, defaults to "\t" +func InitializeTableFromTextFileV2Delimiter(value string) InitializeTableFromTextFileV2Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["delimiter"] = value } } -// A queue that produces elements in first-in first-out order. +// Initializes a table from a text file. // -// Variable-size shapes are allowed by setting the corresponding shape dimensions -// to 0 in the shape attr. In this case DequeueMany will pad up to the maximum -// size of any given element in the minibatch. See below for details. +// It inserts one key-value pair into the table for each line of the file. +// The key and value is extracted from the whole line content, elements from the +// split line based on `delimiter` or the line number (starting from zero). +// Where to extract the key and value from a line is specified by `key_index` and +// `value_index`. +// +// - A value of -1 means use the line number(starting from zero), expects `int64`. +// - A value of -2 means use the whole line content, expects `string`. +// - A value >= 0 means use the index (starting at zero) of the split line based +// on `delimiter`. // // Arguments: -// component_types: The type of each component in a value. +// table_handle: Handle to a table which will be initialized. +// filename: Filename of a vocabulary text file. +// key_index: Column index in a line to get the table `key` values from. +// value_index: Column index that represents information of a line to get the table +// `value` values from. // -// Returns The handle to the queue. -func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) { +// Returns the created operation. +func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filename tf.Output, key_index int64, value_index int64, optional ...InitializeTableFromTextFileV2Attr) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{"key_index": key_index, "value_index": value_index} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "PaddingFIFOQueueV2", - + Type: "InitializeTableFromTextFileV2", + Input: []tf.Input{ + table_handle, filename, + }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// DecodePngAttr is an optional argument to DecodePng. -type DecodePngAttr func(optionalAttr) +// MeanAttr is an optional argument to Mean. +type MeanAttr func(optionalAttr) -// DecodePngChannels sets the optional channels attribute to value. +// MeanKeepDims sets the optional keep_dims attribute to value. // -// value: Number of color channels for the decoded image. -// If not specified, defaults to 0 -func DecodePngChannels(value int64) DecodePngAttr { - return func(m optionalAttr) { - m["channels"] = value - } -} - -// DecodePngDtype sets the optional dtype attribute to value. -// If not specified, defaults to DT_UINT8 -func DecodePngDtype(value tf.DataType) DecodePngAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func MeanKeepDims(value bool) MeanAttr { return func(m optionalAttr) { - m["dtype"] = value + m["keep_dims"] = value } } -// Decode a PNG-encoded image to a uint8 or uint16 tensor. -// -// The attr `channels` indicates the desired number of color channels for the -// decoded image. -// -// Accepted values are: -// -// * 0: Use the number of channels in the PNG-encoded image. -// * 1: output a grayscale image. -// * 3: output an RGB image. -// * 4: output an RGBA image. -// -// If needed, the PNG-encoded image is transformed to match the requested number -// of color channels. +// Computes the mean of elements across dimensions of a tensor. // -// This op also supports decoding JPEGs and non-animated GIFs since the interface -// is the same, though it is cleaner to use `tf.image.decode_image`. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// contents: 0-D. The PNG-encoded image. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns 3-D with shape `[height, width, channels]`. -func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) { +// Returns The reduced tensor. +func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -31680,9 +31269,9 @@ func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (ima a(attrs) } opspec := tf.OpSpec{ - Type: "DecodePng", + Type: "Mean", Input: []tf.Input{ - contents, + input, axis, }, Attrs: attrs, } @@ -31690,128 +31279,122 @@ func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (ima return op.Output(0) } -// Decode the first frame of a GIF-encoded image to a uint8 tensor. -// -// GIF with frame or transparency compression are not supported -// convert animated GIF from compressed to uncompressed by: +// ProdAttr is an optional argument to Prod. +type ProdAttr func(optionalAttr) + +// ProdKeepDims sets the optional keep_dims attribute to value. // -// convert $src.gif -coalesce $dst.gif +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func ProdKeepDims(value bool) ProdAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the product of elements across dimensions of a tensor. // -// This op also supports decoding JPEGs and PNGs, though it is cleaner to use -// `tf.image.decode_image`. +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// contents: 0-D. The GIF-encoded image. +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. // -// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order -func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { +// Returns The reduced tensor. +func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DecodeGif", + Type: "Prod", Input: []tf.Input{ - contents, + input, axis, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler. -type LearnedUnigramCandidateSamplerAttr func(optionalAttr) - -// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// ResizeBilinearAttr is an optional argument to ResizeBilinear. +type ResizeBilinearAttr func(optionalAttr) -// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// ResizeBilinearAlignCorners sets the optional align_corners attribute to value. // -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr { +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr { return func(m optionalAttr) { - m["seed2"] = value + m["align_corners"] = value } } -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. +// Resize `images` to `size` using bilinear interpolation. // -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. +// Input images can be of different types but output images are always float. // // Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "LearnedUnigramCandidateSampler", + Type: "ResizeBilinear", Input: []tf.Input{ - true_classes, + images, size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// SerializeSparseAttr is an optional argument to SerializeSparse. -type SerializeSparseAttr func(optionalAttr) +// MaxAttr is an optional argument to Max. +type MaxAttr func(optionalAttr) -// SerializeSparseOutType sets the optional out_type attribute to value. +// MaxKeepDims sets the optional keep_dims attribute to value. // -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func MaxKeepDims(value bool) MaxAttr { return func(m optionalAttr) { - m["out_type"] = value + m["keep_dims"] = value } } -// Serialize a `SparseTensor` into a `[3]` `Tensor` object. +// Computes the maximum of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. // // Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -31820,9 +31403,9 @@ func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "SerializeSparse", + Type: "Max", Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, + input, axis, }, Attrs: attrs, } @@ -31830,305 +31413,3422 @@ func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Ou return op.Output(0) } -// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2. -type RandomShuffleQueueV2Attr func(optionalAttr) - -// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value. -// -// value: Dequeue will block unless there would be this -// many elements after the dequeue or the queue is closed. This -// ensures a minimum level of mixing of elements. -// If not specified, defaults to 0 -func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["min_after_dequeue"] = value +// Creates a dataset that contains the unique elements of `input_dataset`. +func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } -} - -// RandomShuffleQueueV2Seed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed"] = value + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ExperimentalUniqueDataset", + Input: []tf.Input{ + input_dataset, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} +// ArgMinAttr is an optional argument to ArgMin. +type ArgMinAttr func(optionalAttr) -// RandomShuffleQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr { +// ArgMinOutputType sets the optional output_type attribute to value. +// If not specified, defaults to DT_INT64 +func ArgMinOutputType(value tf.DataType) ArgMinAttr { return func(m optionalAttr) { - m["container"] = value + m["output_type"] = value } } -// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value. +// Returns the index with the smallest value across dimensions of a tensor. // -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that randomizes the order of elements. +// Note that in case of ties the identity of the return value is not guaranteed. // // Arguments: -// component_types: The type of each component in a value. // -// Returns The handle to the queue. -func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) { +// dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`. +// Describes which dimension of the input Tensor to reduce across. For vectors, +// use dimension = 0. +func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "RandomShuffleQueueV2", - + Type: "ArgMin", + Input: []tf.Input{ + input, dimension, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Draw bounding boxes on a batch of images. -// -// Outputs a copy of `images` but draws on top of the pixels zero or more bounding -// boxes specified by the locations in `boxes`. The coordinates of the each -// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. +// Convert the quantized 'input' tensor into a lower-precision 'output', using the // -// For example, if an image is 100 x 200 pixels (height x width) and the bounding -// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of -// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). +// output range specified with 'requested_output_min' and 'requested_output_max'. // -// Parts of the bounding box may fall outside the image. +// [input_min, input_max] are scalar floats that specify the range for the float +// interpretation of the 'input' data. For example, if input_min is -1.0f and +// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0 +// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f. // // Arguments: -// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. -// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding -// boxes. // -// Returns 4-D with the same shape as `images`. The batch of input images with -// bounding boxes drawn on the images. -func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DrawBoundingBoxes", +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// requested_output_min: The float value that the minimum quantized output value represents. +// requested_output_max: The float value that the maximum quantized output value represents. +// out_type: The type of the output. Should be a lower bit depth than Tinput. +// +// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output. +func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"out_type": out_type} + opspec := tf.OpSpec{ + Type: "Requantize", + Input: []tf.Input{ + input, input_min, input_max, requested_output_min, requested_output_max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Creates a dataset that emits the lines of one or more text files. +// +// Arguments: +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar containing the number of bytes to buffer. +func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TextLineDataset", + Input: []tf.Input{ + filenames, compression_type, buffer_size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \sum_j data_j\\) where sum is over `j` such +// that `segment_ids[j] == i`. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// For example: +// +// ``` +// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) +// tf.segment_sum(c, tf.constant([0, 0, 1])) +// # ==> [[5, 5, 5, 5], +// # [5, 6, 7, 8]] +// ``` +// +// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentSum", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the mean along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is +// over `j` such that `segment_ids[j] == i` and `N` is the total number of +// values summed. +// +// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// For example: +// +// ``` +// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) +// tf.segment_mean(c, tf.constant([0, 0, 1])) +// # ==> [[2.5, 2.5, 2.5, 2.5], +// # [5, 6, 7, 8]] +// ``` +// +// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMean", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the minimum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such +// that `segment_ids[j] == i`. +// +// If the min is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// For example: +// +// ``` +// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) +// tf.segment_min(c, tf.constant([0, 0, 1])) +// # ==> [[1, 2, 2, 1], +// # [5, 6, 7, 8]] +// ``` +// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMin", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such +// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` +// need not be sorted and need not cover all values in the full +// range of valid values. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// If the given segment ID `i` is negative, the value is dropped and will not be +// added to the sum of the segment. +// +// `num_segments` should equal the number of distinct segment IDs. +// +//
+// +//
+// +// ``` python +// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) +// tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2) +// # ==> [[ 5, 5, 5, 5], +// # [5, 6, 7, 8]] +// ``` +// +// +// Arguments: +// +// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// +// +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "UnsortedSegmentSum", + Input: []tf.Input{ + data, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the product along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// This operator is similar to the unsorted segment sum operator found +// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum). +// Instead of computing the sum over segments, it computes the product of all +// entries belonging to a segment such that: +// +// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples +// `j...` such that `segment_ids[j...] == i`. +// +// For example: +// +// ``` python +// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]]) +// tf.unsorted_segment_prod(c, tf.constant([0, 1, 0]), num_segments=2) +// # ==> [[ 4, 6, 6, 4], +// # [5, 6, 7, 8]] +// ``` +// +// If there is no entry for a given segment ID `i`, it outputs 1. +// +// If the given segment ID `i` is negative, then the corresponding value is +// dropped, and will not be included in the result. +// +// Arguments: +// +// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// +// +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "UnsortedSegmentProd", + Input: []tf.Input{ + data, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes hyperbolic cosine of x element-wise. +func Cosh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Cosh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the mean along sparse segments of a tensor. +// +// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. +// +// Returns Has same shape as data, except for dimension 0 which has size +// `num_segments`. +func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentMeanWithNumSegments", + Input: []tf.Input{ + data, indices, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize. +type CudnnRNNParamsSizeAttr func(optionalAttr) + +// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value. +// If not specified, defaults to "lstm" +func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["rnn_mode"] = value + } +} + +// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value. +// If not specified, defaults to "linear_input" +func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["input_mode"] = value + } +} + +// CudnnRNNParamsSizeDirection sets the optional direction attribute to value. +// If not specified, defaults to "unidirectional" +func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["direction"] = value + } +} + +// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["dropout"] = value + } +} + +// CudnnRNNParamsSizeSeed sets the optional seed attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value. +// If not specified, defaults to 0 +func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Computes size of weights that can be used by a Cudnn RNN model. +// +// Return the params size that can be used by the Cudnn RNN model. Subsequent +// weight allocation and initialization should use this size. +// +// num_layers: Specifies the number of layers in the RNN model. +// num_units: Specifies the size of the hidden state. +// input_size: Specifies the size of the input state. +// rnn_mode: Indicates the type of the RNN model. +// input_mode: Indicate whether there is a linear projection between the input and +// The actual computation before the first layer. 'skip_input' is only allowed +// when input_size == num_units; 'auto_select' implies 'skip_input' when +// input_size == num_units; otherwise, it implies 'linear_input'. +// direction: Indicates whether a bidirectional model will be used. +// dir = (direction == bidirectional) ? 2 : 1 +// dropout: dropout probability. When set to 0., dropout is disabled. +// seed: the 1st part of a seed to initialize dropout. +// seed2: the 2nd part of a seed to initialize dropout. +// params_size: The size of the params buffer that should be allocated and +// initialized for this RNN model. Note that this params buffer may not be +// compatible across GPUs. Please use CudnnRNNParamsWeights and +// CudnnRNNParamsBiases to save and restore them in a way that is compatible +// across different runs. +func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"T": T, "S": S} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "CudnnRNNParamsSize", + Input: []tf.Input{ + num_layers, num_units, input_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes gradients for SparseSegmentMean. +// +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. +// +// Arguments: +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentMeanGrad", + Input: []tf.Input{ + grad, indices, segment_ids, output_dim0, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// +// N is the size of the segment being reduced. +// +// See `tf.sparse.segment_sum` for usage examples. +// +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtN", + Input: []tf.Input{ + data, indices, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Compute the upper regularized incomplete Gamma function `Q(a, x)`. +// +// The upper regularized incomplete Gamma function is defined as: +// +// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\) +// +// where +// +// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\) +// +// is the upper incomplete Gama function. +// +// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete +// Gamma function. +func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Igammac", + Input: []tf.Input{ + a, x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the sum along sparse segments of a tensor divided by the sqrt of N. +// +// N is the size of the segment being reduced. +// +// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is +// misisng, the `output` tensor at that position will be zeroed. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Arguments: +// +// indices: A 1-D tensor. Has same rank as `segment_ids`. +// segment_ids: A 1-D tensor. Values should be sorted and can be repeated. +// num_segments: Should equal the number of distinct segment IDs. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtNWithNumSegments", + Input: []tf.Input{ + data, indices, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes gradients for SparseSegmentSqrtN. +// +// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. +// +// Arguments: +// grad: gradient propagated to the SparseSegmentSqrtN op. +// indices: indices passed to the corresponding SparseSegmentSqrtN op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op. +func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSegmentSqrtNGrad", + Input: []tf.Input{ + grad, indices, segment_ids, output_dim0, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// LRNGradAttr is an optional argument to LRNGrad. +type LRNGradAttr func(optionalAttr) + +// LRNGradDepthRadius sets the optional depth_radius attribute to value. +// +// value: A depth radius. +// If not specified, defaults to 5 +func LRNGradDepthRadius(value int64) LRNGradAttr { + return func(m optionalAttr) { + m["depth_radius"] = value + } +} + +// LRNGradBias sets the optional bias attribute to value. +// +// value: An offset (usually > 0 to avoid dividing by 0). +// If not specified, defaults to 1 +func LRNGradBias(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["bias"] = value + } +} + +// LRNGradAlpha sets the optional alpha attribute to value. +// +// value: A scale factor, usually positive. +// If not specified, defaults to 1 +func LRNGradAlpha(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["alpha"] = value + } +} + +// LRNGradBeta sets the optional beta attribute to value. +// +// value: An exponent. +// If not specified, defaults to 0.5 +func LRNGradBeta(value float32) LRNGradAttr { + return func(m optionalAttr) { + m["beta"] = value + } +} + +// Gradients for Local Response Normalization. +// +// Arguments: +// input_grads: 4-D with shape `[batch, height, width, channels]`. +// input_image: 4-D with shape `[batch, height, width, channels]`. +// output_image: 4-D with shape `[batch, height, width, channels]`. +// +// Returns The gradients for LRN. +func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LRNGrad", + Input: []tf.Input{ + input_grads, input_image, output_image, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AnyAttr is an optional argument to Any. +type AnyAttr func(optionalAttr) + +// AnyKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AnyKeepDims(value bool) AnyAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the "logical or" of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Any", + Input: []tf.Input{ + input, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DestroyResourceOpAttr is an optional argument to DestroyResourceOp. +type DestroyResourceOpAttr func(optionalAttr) + +// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value. +// +// value: whether to ignore the error when the resource +// doesn't exist. +// If not specified, defaults to true +func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr { + return func(m optionalAttr) { + m["ignore_lookup_error"] = value + } +} + +// Deletes the resource specified by the handle. +// +// All subsequent operations using the resource will result in a NotFound +// error status. +// +// Arguments: +// resource: handle to the resource to delete. +// +// Returns the created operation. +func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DestroyResourceOp", + Input: []tf.Input{ + resource, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Generates values in an interval. +// +// A sequence of `num` evenly-spaced values are generated beginning at `start`. +// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`, +// so that the last one is exactly `stop`. +// +// For example: +// +// ``` +// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0 11.0 12.0] +// ``` +// +// Arguments: +// start: 0-D tensor. First entry in the range. +// stop: 0-D tensor. Last entry in the range. +// num: 0-D tensor. Number of values to generate. +// +// Returns 1-D. The generated values. +func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LinSpace", + Input: []tf.Input{ + start, stop, num, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ComplexAttr is an optional argument to Complex. +type ComplexAttr func(optionalAttr) + +// ComplexTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_COMPLEX64 +func ComplexTout(value tf.DataType) ComplexAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Converts two real numbers to a complex number. +// +// Given a tensor `real` representing the real part of a complex number, and a +// tensor `imag` representing the imaginary part of a complex number, this +// operation returns complex numbers elementwise of the form \\(a + bj\\), where +// *a* represents the `real` part and *b* represents the `imag` part. +// +// The input tensors `real` and `imag` must have the same shape. +// +// For example: +// +// ``` +// # tensor 'real' is [2.25, 3.25] +// # tensor `imag` is [4.75, 5.75] +// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]] +// ``` +func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Complex", + Input: []tf.Input{ + real, imag, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ImagAttr is an optional argument to Imag. +type ImagAttr func(optionalAttr) + +// ImagTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func ImagTout(value tf.DataType) ImagAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the imaginary part of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the imaginary part of each element in `input`. All +// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part returned by this operation. +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.imag(input) ==> [4.75, 5.75] +// ``` +func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Imag", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes hyperbolic tangent of `x` element-wise. +func Tanh(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Tanh", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the maximum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such +// that `segment_ids[j] == i`. +// +// If the max is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// For example: +// +// ``` +// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]]) +// tf.segment_max(c, tf.constant([0, 0, 1])) +// # ==> [[4, 3, 3, 4], +// # [5, 6, 7, 8]] +// ``` +// +// +// Arguments: +// +// segment_ids: A 1-D tensor whose size is equal to the size of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMax", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that skips `count` elements from the `input_dataset`. +// +// Arguments: +// +// count: A scalar representing the number of elements from the `input_dataset` +// that should be skipped. If count is -1, skips everything. +// +// +func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "SkipDataset", + Input: []tf.Input{ + input_dataset, count, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// VarHandleOpAttr is an optional argument to VarHandleOp. +type VarHandleOpAttr func(optionalAttr) + +// VarHandleOpContainer sets the optional container attribute to value. +// +// value: the container this variable is placed in. +// If not specified, defaults to "" +func VarHandleOpContainer(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// VarHandleOpSharedName sets the optional shared_name attribute to value. +// +// value: the name by which this variable is referred to. +// If not specified, defaults to "" +func VarHandleOpSharedName(value string) VarHandleOpAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Creates a handle to a Variable resource. +// +// Arguments: +// dtype: the type of this variable. Must agree with the dtypes +// of all ops using this variable. +// shape: The (possibly partially specified) shape of this variable. +func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "VarHandleOp", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AngleAttr is an optional argument to Angle. +type AngleAttr func(optionalAttr) + +// AngleTout sets the optional Tout attribute to value. +// If not specified, defaults to DT_FLOAT +func AngleTout(value tf.DataType) AngleAttr { + return func(m optionalAttr) { + m["Tout"] = value + } +} + +// Returns the argument of a complex number. +// +// Given a tensor `input` of complex numbers, this operation returns a tensor of +// type `float` that is the argument of each element in `input`. All elements in +// `input` must be complex numbers of the form \\(a + bj\\), where *a* +// is the real part and *b* is the imaginary part. +// +// The argument returned by this operation is of the form \\(atan2(b, a)\\). +// +// For example: +// +// ``` +// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j] +// tf.angle(input) ==> [2.0132, 1.056] +// ``` +// +// @compatibility(numpy) +// Equivalent to np.angle. +// @end_compatibility +func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Angle", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Clips tensor values to a specified min and max. +// +// Given a tensor `t`, this operation returns a tensor of the same type and +// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`. +// Any values less than `clip_value_min` are set to `clip_value_min`. Any values +// greater than `clip_value_max` are set to `clip_value_max`. +// +// Arguments: +// t: A `Tensor`. +// clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The minimum value to clip by. +// clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape +// as `t`. The maximum value to clip by. +// +// Returns A clipped `Tensor` with the same shape as input 't'. +func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ClipByValue", + Input: []tf.Input{ + t, clip_value_min, clip_value_max, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Counts the number of occurrences of each value in an integer array. +// +// Outputs a vector with length `size` and the same dtype as `weights`. If +// `weights` are empty, then index `i` stores the number of times the value `i` is +// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of +// the value in `weights` at each index where the corresponding value in `arr` is +// `i`. +// +// Values in `arr` outside of the range [0, size) are ignored. +// +// Arguments: +// arr: int32 `Tensor`. +// size: non-negative int32 scalar `Tensor`. +// weights: is an int32, int64, float32, or float64 `Tensor` with the same +// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights +// equal to 1. +// +// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for +// each value in the range [0, size). +func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Bincount", + Input: []tf.Input{ + arr, size, weights, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CumsumAttr is an optional argument to Cumsum. +type CumsumAttr func(optionalAttr) + +// CumsumExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumsum. +// If not specified, defaults to false +func CumsumExclusive(value bool) CumsumAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumsumReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumsumReverse(value bool) CumsumAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative sum of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumsum, which means that the first +// element of the input is identical to the first element of the output: +// +// ```python +// tf.cumsum([a, b, c]) # => [a, a + b, a + b + c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is +// performed instead: +// +// ```python +// tf.cumsum([a, b, c], exclusive=True) # => [0, a, a + b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumsum is performed in the +// opposite direction: +// +// ```python +// tf.cumsum([a, b, c], reverse=True) # => [a + b + c, b + c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumsum([a, b, c], exclusive=True, reverse=True) # => [b + c, c, 0] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Cumsum", + Input: []tf.Input{ + x, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Return the shape of s0 op s1 with broadcast. +// +// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the +// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors. +func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BroadcastArgs", + Input: []tf.Input{ + s0, s1, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DataFormatDimMapAttr is an optional argument to DataFormatDimMap. +type DataFormatDimMapAttr func(optionalAttr) + +// DataFormatDimMapSrcFormat sets the optional src_format attribute to value. +// +// value: source data format. +// If not specified, defaults to "NHWC" +func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr { + return func(m optionalAttr) { + m["src_format"] = value + } +} + +// DataFormatDimMapDstFormat sets the optional dst_format attribute to value. +// +// value: destination data format. +// If not specified, defaults to "NCHW" +func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr { + return func(m optionalAttr) { + m["dst_format"] = value + } +} + +// Returns the dimension index in the destination data format given the one in +// +// the source data format. +// +// Arguments: +// x: A Tensor with each element as a dimension index in source data format. +// Must be in the range [-4, 4). +// +// Returns A Tensor with each element as a dimension index in destination data format. +func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "DataFormatDimMap", + Input: []tf.Input{ + x, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumprodReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: +// +// ```python +// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +// performed instead: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumprod is performed in the +// opposite direction: +// +// ```python +// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Cumprod", + Input: []tf.Input{ + x, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters. +type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr) + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value. +// If not specified, defaults to -1 +// +// REQUIRES: value >= -1 +func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_id"] = value + } +} + +// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value. +// If not specified, defaults to "" +func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr { + return func(m optionalAttr) { + m["table_name"] = value + } +} + +// Retrieve SGD embedding parameters. +// +// An op that retrieves optimization parameters from embedding to host +// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up +// the correct embedding table configuration. For example, this op is +// used to retrieve updated parameters before saving a checkpoint. +// +// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm. +func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// QuantizedMatMulAttr is an optional argument to QuantizedMatMul. +type QuantizedMatMulAttr func(optionalAttr) + +// QuantizedMatMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Toutput"] = value + } +} + +// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value. +// +// value: If true, `a` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["transpose_a"] = value + } +} + +// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value. +// +// value: If true, `b` is transposed before multiplication. +// If not specified, defaults to false +func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["transpose_b"] = value + } +} + +// QuantizedMatMulTactivation sets the optional Tactivation attribute to value. +// +// value: The type of output produced by activation function +// following this operation. +// If not specified, defaults to DT_QUINT8 +func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr { + return func(m optionalAttr) { + m["Tactivation"] = value + } +} + +// Perform a quantized matrix multiplication of `a` by the matrix `b`. +// +// The inputs must be two-dimensional matrices and the inner dimension of +// `a` (after being transposed if `transpose_a` is non-zero) must match the +// outer dimension of `b` (after being transposed if `transposed_b` is +// non-zero). +// +// Arguments: +// a: Must be a two-dimensional tensor. +// b: Must be a two-dimensional tensor. +// min_a: The float value that the lowest quantized `a` value represents. +// max_a: The float value that the highest quantized `a` value represents. +// min_b: The float value that the lowest quantized `b` value represents. +// max_b: The float value that the highest quantized `b` value represents. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedMatMul", + Input: []tf.Input{ + a, b, min_a, max_a, min_b, max_b, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// QuantizedMulAttr is an optional argument to QuantizedMul. +type QuantizedMulAttr func(optionalAttr) + +// QuantizedMulToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr { + return func(m optionalAttr) { + m["Toutput"] = value + } +} + +// Returns x * y element-wise, working on quantized buffers. +// +// Arguments: +// +// +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// +// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedMul", + Input: []tf.Input{ + x, y, min_x, max_x, min_y, max_y, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// QuantizedAddAttr is an optional argument to QuantizedAdd. +type QuantizedAddAttr func(optionalAttr) + +// QuantizedAddToutput sets the optional Toutput attribute to value. +// If not specified, defaults to DT_QINT32 +func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr { + return func(m optionalAttr) { + m["Toutput"] = value + } +} + +// Returns x + y element-wise, working on quantized buffers. +// +// Arguments: +// +// +// min_x: The float value that the lowest quantized `x` value represents. +// max_x: The float value that the highest quantized `x` value represents. +// min_y: The float value that the lowest quantized `y` value represents. +// max_y: The float value that the highest quantized `y` value represents. +// +// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents. +// +// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about +// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "QuantizedAdd", + Input: []tf.Input{ + x, y, min_x, max_x, min_y, max_y, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// Given a quantized tensor described by (input, input_min, input_max), outputs a +// +// range that covers the actual values present in that tensor. This op is +// typically used to produce the requested_output_min and requested_output_max for +// Requantize. +// +// Arguments: +// +// input_min: The float value that the minimum quantized input value represents. +// input_max: The float value that the maximum quantized input value represents. +// +// Returns The computed min output.the computed max output. +func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RequantizationRange", + Input: []tf.Input{ + input, input_min, input_max, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Rolls the elements of a tensor along an axis. +// +// The elements are shifted positively (towards larger indices) by the offset of +// `shift` along the dimension of `axis`. Negative `shift` values will shift +// elements in the opposite direction. Elements that roll passed the last position +// will wrap around to the first and vice versa. Multiple shifts along multiple +// axes may be specified. +// +// For example: +// +// ``` +// # 't' is [0, 1, 2, 3, 4] +// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2] +// +// # shifting along multiple dimensions +// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] +// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]] +// +// # shifting along the same axis multiple times +// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] +// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]] +// ``` +// +// Arguments: +// +// shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which +// elements are shifted positively (towards larger indices) along the dimension +// specified by `axis[i]`. Negative shifts will roll the elements in the opposite +// direction. +// axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift +// `shift[i]` should occur. If the same axis is referenced more than once, the +// total shift for that axis will be the sum of all the shifts that belong to that +// axis. +// +// Returns Has the same shape and size as the input. The elements are shifted +// positively (towards larger indices) by the offsets of `shift` along the +// dimensions of `axis`. +func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Roll", + Input: []tf.Input{ + input, shift, axis, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Updates the table to associates keys with values. +// +// The tensor `keys` must be of the same type as the keys of the table. +// The tensor `values` must be of the type of the table values. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys to look up. +// values: Values to associate with keys. +// +// Returns the created operation. +func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableInsertV2", + Input: []tf.Input{ + table_handle, keys, values, + }, + } + return scope.AddOperation(opspec) +} + +// Creates a `Dataset` that includes only 1/`num_shards` of this dataset. +// +// Arguments: +// +// num_shards: An integer representing the number of shards operating in parallel. +// index: An integer representing the current worker index. +// +// +func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ShardDataset", + Input: []tf.Input{ + input_dataset, num_shards, index, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that batches and pads `batch_size` elements from the input. +// +// Arguments: +// +// batch_size: A scalar representing the number of elements to accumulate in a +// batch. +// padded_shapes: A list of int64 tensors representing the desired padded shapes +// of the corresponding output components. These shapes may be partially +// specified, using `-1` to indicate that a particular dimension should be +// padded to the maximum size of all batch elements. +// padding_values: A list of scalars containing the padding value to use for +// each of the outputs. +// drop_remainder: A scalar representing whether the last batch should be dropped in case its size +// is smaller than desired. +// +func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, drop_remainder tf.Output, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "PaddedBatchDatasetV2", + Input: []tf.Input{ + input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), drop_remainder, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns element-wise smallest integer not less than x. +func Ceil(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Ceil", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the number of elements in the given table. +// +// Arguments: +// table_handle: Handle to the table. +// +// Returns Scalar that contains number of elements in the table. +func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "LookupTableSizeV2", + Input: []tf.Input{ + table_handle, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad. +type ResizeBilinearGradAttr func(optionalAttr) + +// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Computes the gradient of bilinear interpolation. +// +// Arguments: +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResizeBilinearGrad", + Input: []tf.Input{ + grads, original_image, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs all keys and values in the table. +// +// Arguments: +// table_handle: Handle to the table. +// +// +// +// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`. +func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues} + opspec := tf.OpSpec{ + Type: "LookupTableExportV2", + Input: []tf.Input{ + table_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle. +type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr) + +// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. +// +// value: The type list for the return values. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr { + return func(m optionalAttr) { + m["output_types"] = value + } +} + +// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. +// +// value: The list of shapes being produced. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr { + return func(m optionalAttr) { + m["output_shapes"] = value + } +} + +// Generates a MultiDeviceIterator resource from its provided string handle. +// +// Arguments: +// string_handle: String representing the resource. +// +// Returns A MultiDeviceIterator resource. +func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MultiDeviceIteratorFromStringHandle", + Input: []tf.Input{ + string_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// MutableHashTableV2Attr is an optional argument to MutableHashTableV2. +type MutableHashTableV2Attr func(optionalAttr) + +// MutableHashTableV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableV2Container(value string) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MutableHashTableV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// Creates an empty hash table. +// +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a scalar. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MutableHashTableV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// DequantizeAttr is an optional argument to Dequantize. +type DequantizeAttr func(optionalAttr) + +// DequantizeMode sets the optional mode attribute to value. +// If not specified, defaults to "MIN_COMBINED" +func DequantizeMode(value string) DequantizeAttr { + return func(m optionalAttr) { + m["mode"] = value + } +} + +// Dequantize the 'input' tensor into a float Tensor. +// +// [min_range, max_range] are scalar floats that specify the range for +// the 'input' data. The 'mode' attribute controls exactly which calculations are +// used to convert the float values to their quantized equivalents. +// +// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: +// +// ``` +// if T == qint8: in[i] += (range(T) + 1)/ 2.0 +// out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) +// ``` +// here `range(T) = numeric_limits::max() - numeric_limits::min()` +// +// *MIN_COMBINED Mode Example* +// +// If the input comes from a QuantizedRelu6, the output type is +// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is +// 0-6. The min_range and max_range values are therefore 0.0 and 6.0. +// Dequantize on quint8 will take each value, cast to float, and multiply +// by 6 / 255. +// Note that if quantizedtype is qint8, the operation will additionally add +// each value by 128 prior to casting. +// +// If the mode is 'MIN_FIRST', then this approach is used: +// +// ```c++ +// num_discrete_values = 1 << (# of bits in T) +// range_adjust = num_discrete_values / (num_discrete_values - 1) +// range = (range_max - range_min) * range_adjust +// range_scale = range / num_discrete_values +// const double offset_input = static_cast(input) - lowest_quantized; +// result = range_min + ((input - numeric_limits::min()) * range_scale) +// ``` +// +// *SCALED mode Example* +// +// `SCALED` mode matches the quantization approach used in +// `QuantizeAndDequantize{V2|V3}`. +// +// If the mode is `SCALED`, we do not use the full range of the output type, +// choosing to elide the lowest possible value for symmetry (e.g., output range is +// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to +// 0. +// +// We first find the range of values in our tensor. The +// range we use is always centered on 0, so we find m such that +// ```c++ +// m = max(abs(input_min), abs(input_max)) +// ``` +// +// Our input tensor range is then `[-m, m]`. +// +// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`. +// If T is signed, this is +// ``` +// num_bits = sizeof(T) * 8 +// [min_fixed, max_fixed] = +// [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1] +// ``` +// +// Otherwise, if T is unsigned, the fixed-point range is +// ``` +// [min_fixed, max_fixed] = [0, (1 << num_bits) - 1] +// ``` +// +// From this we compute our scaling factor, s: +// ```c++ +// s = (2 * m) / (max_fixed - min_fixed) +// ``` +// +// Now we can dequantize the elements of our tensor: +// ```c++ +// result = input * s +// ``` +// +// Arguments: +// +// min_range: The minimum scalar value possibly produced for the input. +// max_range: The maximum scalar value possibly produced for the input. +func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Dequantize", + Input: []tf.Input{ + input, min_range, max_range, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Flips all bits elementwise. +// +// The result will have exactly those bits set, that are not set in `x`. The +// computation is performed on the underlying representation of x. +func Invert(scope *Scope, x tf.Output) (y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Invert", + Input: []tf.Input{ + x, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deserialize bucket boundaries and ready flag into current QuantileAccumulator. +// +// An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator. +// +// Arguments: +// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. +// bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a feature. +// +// Returns the created operation. +func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream_resource_handle tf.Output, bucket_boundaries []tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceDeserialize", + Input: []tf.Input{ + quantile_stream_resource_handle, tf.OutputList(bucket_boundaries), + }, + } + return scope.AddOperation(opspec) +} + +// Inverse 3D fast Fourier transform. +// +// Computes the inverse 3-dimensional discrete Fourier transform over the +// inner-most 3 dimensions of `input`. +// +// Arguments: +// input: A complex64 tensor. +// +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their inverse 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.ifftn with 3 dimensions. +// @end_compatibility +func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "IFFT3D", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Shuts down a running distributed TPU system. +// +// The op returns an error if no system is running. +// +// Returns the created operation. +func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShutdownDistributedTPU", + } + return scope.AddOperation(opspec) +} + +// Deprecated. Disallowed in GraphDef version >= 2. +// +// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead +func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "AdjustContrast", + Input: []tf.Input{ + images, contrast_factor, min_value, max_value, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Table initializer that takes two tensors for keys and values respectively. +// +// Arguments: +// table_handle: Handle to a table which will be initialized. +// keys: Keys of type Tkey. +// values: Values of type Tval. +// +// Returns the created operation. +func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "InitializeTableV2", + Input: []tf.Input{ + table_handle, keys, values, + }, + } + return scope.AddOperation(opspec) +} + +// PrintAttr is an optional argument to Print. +type PrintAttr func(optionalAttr) + +// PrintMessage sets the optional message attribute to value. +// +// value: A string, prefix of the error message. +// If not specified, defaults to "" +func PrintMessage(value string) PrintAttr { + return func(m optionalAttr) { + m["message"] = value + } +} + +// PrintFirstN sets the optional first_n attribute to value. +// +// value: Only log `first_n` number of times. -1 disables logging. +// If not specified, defaults to -1 +func PrintFirstN(value int64) PrintAttr { + return func(m optionalAttr) { + m["first_n"] = value + } +} + +// PrintSummarize sets the optional summarize attribute to value. +// +// value: Only print this many entries of each tensor. +// If not specified, defaults to 3 +func PrintSummarize(value int64) PrintAttr { + return func(m optionalAttr) { + m["summarize"] = value + } +} + +// Prints a list of tensors. +// +// Passes `input` through to `output` and prints `data` when evaluating. +// +// Arguments: +// input: The tensor passed to `output` +// data: A list of tensors to print out when op is evaluated. +// +// Returns = The unmodified `input` tensor +func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Print", + Input: []tf.Input{ + input, tf.OutputList(data), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with a tensor and per-plugin data. +// +// Arguments: +// tag: A string attached to this summary. Used for organization in TensorBoard. +// tensor: A tensor to serialize. +// serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin +// data. +func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorSummaryV2", + Input: []tf.Input{ + tag, tensor, serialized_summary_metadata, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that asynchronously prefetches elements from `input_dataset`. +// +// Arguments: +// +// buffer_size: The maximum number of elements to buffer in an iterator over +// this dataset. +// +// +func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "PrefetchDataset", + Input: []tf.Input{ + input_dataset, buffer_size, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// TensorSummaryAttr is an optional argument to TensorSummary. +type TensorSummaryAttr func(optionalAttr) + +// TensorSummaryDescription sets the optional description attribute to value. +// +// value: A json-encoded SummaryDescription proto. +// If not specified, defaults to "" +func TensorSummaryDescription(value string) TensorSummaryAttr { + return func(m optionalAttr) { + m["description"] = value + } +} + +// TensorSummaryLabels sets the optional labels attribute to value. +// +// value: An unused list of strings. +// If not specified, defaults to <> +func TensorSummaryLabels(value []string) TensorSummaryAttr { + return func(m optionalAttr) { + m["labels"] = value + } +} + +// TensorSummaryDisplayName sets the optional display_name attribute to value. +// +// value: An unused string. +// If not specified, defaults to "" +func TensorSummaryDisplayName(value string) TensorSummaryAttr { + return func(m optionalAttr) { + m["display_name"] = value + } +} + +// Outputs a `Summary` protocol buffer with a tensor. +// +// This op is being phased out in favor of TensorSummaryV2, which lets callers pass +// a tag as well as a serialized SummaryMetadata proto string that contains +// plugin-specific data. We will keep this op to maintain backwards compatibility. +// +// Arguments: +// tensor: A tensor to serialize. +func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "TensorSummary", + Input: []tf.Input{ + tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Read an element from the TensorArray into output `value`. +// +// Arguments: +// handle: The handle to a TensorArray. +// +// flow_in: A float scalar that enforces proper chaining of operations. +// dtype: The type of the elem that is returned. +// +// Returns The tensor that is read from the TensorArray. +func TensorArrayReadV3(scope *Scope, handle tf.Output, index tf.Output, flow_in tf.Output, dtype tf.DataType) (value tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtype": dtype} + opspec := tf.OpSpec{ + Type: "TensorArrayReadV3", + Input: []tf.Input{ + handle, index, flow_in, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Reduces sparse updates into the variable referenced by `resource` using the `max` operation. +// +// This operation computes +// +// # Scalar indices +// ref[indices, ...] = max(ref[indices, ...], updates[...]) +// +// # Vector indices (for each i) +// ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...]) +// +// # High rank indices (for each i, ..., j) +// ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...]) +// +// Duplicate entries are handled correctly: if multiple `indices` reference +// the same location, their contributions are combined. +// +// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`. +// +//
+// +//
+// +// Arguments: +// resource: Should be from a `Variable` node. +// indices: A tensor of indices into the first dimension of `ref`. +// updates: A tensor of updated values to add to `ref`. +// +// Returns the created operation. +func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ResourceScatterMax", + Input: []tf.Input{ + resource, indices, updates, + }, + } + return scope.AddOperation(opspec) +} + +// Computes the gradient for the tanh of `x` wrt its input. +// +// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy` +// is the corresponding input gradient. +func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TanhGrad", + Input: []tf.Input{ + y, dy, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Outputs a `Summary` protocol buffer with scalar values. +// +// The input `tags` and `values` must have the same shape. The generated summary +// has a summary value for each tag-value pair in `tags` and `values`. +// +// Arguments: +// tags: Tags for the summary. +// values: Same shape as `tags. Values for the summary. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ScalarSummary", + Input: []tf.Input{ + tags, values, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ImageSummaryAttr is an optional argument to ImageSummary. +type ImageSummaryAttr func(optionalAttr) + +// ImageSummaryMaxImages sets the optional max_images attribute to value. +// +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func ImageSummaryMaxImages(value int64) ImageSummaryAttr { + return func(m optionalAttr) { + m["max_images"] = value + } +} + +// ImageSummaryBadColor sets the optional bad_color attribute to value. +// +// value: Color to use for pixels with non-finite values. +// If not specified, defaults to > int_val:255 int_val:0 int_val:0 int_val:255 > +func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { + return func(m optionalAttr) { + m["bad_color"] = value + } +} + +// Outputs a `Summary` protocol buffer with images. +// +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ImageSummary", + Input: []tf.Input{ + tag, tensor, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AudioSummaryV2Attr is an optional argument to AudioSummaryV2. +type AudioSummaryV2Attr func(optionalAttr) + +// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Outputs a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AudioSummaryV2", + Input: []tf.Input{ + tag, tensor, sample_rate, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Splits a tensor into a list. +// +// list[i] corresponds to lengths[i] tensors from the input tensor. +// The tensor must have rank at least 1 and contain exactly sum(lengths) elements. +// +// tensor: The input tensor. +// element_shape: A shape compatible with that of elements in the tensor. +// lengths: Vector of sizes of the 0th dimension of tensors in the list. +// output_handle: The list. +func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, lengths tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListSplit", + Input: []tf.Input{ + tensor, element_shape, lengths, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AvgPoolAttr is an optional argument to AvgPool. +type AvgPoolAttr func(optionalAttr) + +// AvgPoolDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func AvgPoolDataFormat(value string) AvgPoolAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs average pooling on the input. +// +// Each entry in `output` is the mean of the corresponding size `ksize` +// window in `value`. +// +// Arguments: +// value: 4-D with shape `[batch, height, width, channels]`. +// ksize: The size of the sliding window for each dimension of `value`. +// strides: The stride of the sliding window for each dimension of `value`. +// padding: The type of padding algorithm to use. +// +// Returns The average pooled output tensor. +func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AvgPool", + Input: []tf.Input{ + value, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Merges summaries. +// +// This op creates a +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// protocol buffer that contains the union of all the values in the input +// summaries. +// +// When the Op is run, it reports an `InvalidArgument` error if multiple values +// in the summaries to merge use the same tag. +// +// Arguments: +// inputs: Can be of any shape. Each must contain serialized `Summary` protocol +// buffers. +// +// Returns Scalar. Serialized `Summary` protocol buffer. +func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MergeSummary", + Input: []tf.Input{ + tf.OutputList(inputs), + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// The shape of the elements of the given list, as a tensor. +// +// input_handle: the list +// element_shape: the shape of elements of the list +func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"shape_type": shape_type} + opspec := tf.OpSpec{ + Type: "TensorListElementShape", + Input: []tf.Input{ + input_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns the item in the list with the given index. +// +// input_handle: the list +// index: the position in the list from which an element will be retrieved +// item: the element at that position +// +// +func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "TensorListGetItem", + Input: []tf.Input{ + input_handle, index, element_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Resizes the list. +// +// +// input_handle: the input list +// size: size of the output list +// +func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListResize", + Input: []tf.Input{ + input_handle, size, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns a diagonal tensor with a given diagonal values. +// +// Given a `diagonal`, this operation returns a tensor with the `diagonal` and +// everything else padded with zeros. The diagonal is computed as follows: +// +// Assume `diagonal` has dimensions [D1,..., Dk], then the output is a tensor of +// rank 2k with dimensions [D1,..., Dk, D1,..., Dk] where: +// +// `output[i1,..., ik, i1,..., ik] = diagonal[i1, ..., ik]` and 0 everywhere else. +// +// For example: +// +// ``` +// # 'diagonal' is [1, 2, 3, 4] +// tf.diag(diagonal) ==> [[1, 0, 0, 0] +// [0, 2, 0, 0] +// [0, 0, 3, 0] +// [0, 0, 0, 4]] +// ``` +// +// Arguments: +// diagonal: Rank k tensor where k is at most 1. +func Diag(scope *Scope, diagonal tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Diag", + Input: []tf.Input{ + diagonal, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal. +type ParameterizedTruncatedNormalAttr func(optionalAttr) + +// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Outputs random values from a normal distribution. The parameters may each be a +// +// scalar which applies to the entire output, or a vector of length shape[0] which +// stores the parameters for each batch. +// +// Arguments: +// shape: The shape of the output tensor. Batches are indexed by the 0th dimension. +// means: The mean parameter of each batch. +// stdevs: The standard deviation parameter of each batch. Must be greater than 0. +// minvals: The minimum cutoff. May be -infinity. +// maxvals: The maximum cutoff. May be +infinity, and must be more than the minval +// for each batch. +// +// Returns A matrix of shape num_batches x samples_per_batch, filled with random +// truncated normal values using the parameters for each row. +func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ParameterizedTruncatedNormal", + Input: []tf.Input{ + shape, means, stdevs, minvals, maxvals, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Sets the index-th position of the list to contain the given tensor. +// +// input_handle: the list +// index: the position in the list to which the tensor will be assigned +// item: the element to be assigned to that position +// output_handle: the new list, with the element in the proper position +// +func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListSetItem", + Input: []tf.Input{ + input_handle, index, item, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a TensorList by indexing into a Tensor. +// +// Each member of the TensorList corresponds to one row of the input tensor, +// specified by the given index (see `tf.gather`). +// +// tensor: The input tensor. +// indices: The indices used to index into the list. +// element_shape: The shape of the elements in the list (can be less specified than +// the shape of the tensor). +// output_handle: The TensorList. +func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorListScatter", + Input: []tf.Input{ + tensor, indices, element_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Deprecated. Use TensorArrayScatterV3 +// +// DEPRECATED at GraphDef version 26: Use TensorArrayScatterV3 +func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TensorArrayScatterV2", + Input: []tf.Input{ + handle, indices, value, flow_in, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// AsStringAttr is an optional argument to AsString. +type AsStringAttr func(optionalAttr) + +// AsStringPrecision sets the optional precision attribute to value. +// +// value: The post-decimal precision to use for floating point numbers. +// Only used if precision > -1. +// If not specified, defaults to -1 +func AsStringPrecision(value int64) AsStringAttr { + return func(m optionalAttr) { + m["precision"] = value + } +} + +// AsStringScientific sets the optional scientific attribute to value. +// +// value: Use scientific notation for floating point numbers. +// If not specified, defaults to false +func AsStringScientific(value bool) AsStringAttr { + return func(m optionalAttr) { + m["scientific"] = value + } +} + +// AsStringShortest sets the optional shortest attribute to value. +// +// value: Use shortest representation (either scientific or standard) for +// floating point numbers. +// If not specified, defaults to false +func AsStringShortest(value bool) AsStringAttr { + return func(m optionalAttr) { + m["shortest"] = value + } +} + +// AsStringWidth sets the optional width attribute to value. +// +// value: Pad pre-decimal numbers to this width. +// Applies to both floating point and integer numbers. +// Only used if width > -1. +// If not specified, defaults to -1 +func AsStringWidth(value int64) AsStringAttr { + return func(m optionalAttr) { + m["width"] = value + } +} + +// AsStringFill sets the optional fill attribute to value. +// +// value: The value to pad if width > -1. If empty, pads with spaces. +// Another typical value is '0'. String cannot be longer than 1 character. +// If not specified, defaults to "" +func AsStringFill(value string) AsStringAttr { + return func(m optionalAttr) { + m["fill"] = value + } +} + +// Converts each entry in the given tensor to strings. Supports many numeric +// +// types and boolean. +func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "AsString", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns a `RaggedTensor` containing the specified sequences of numbers. +// +// +// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and +// `rt_nested_splits`, such that +// `result[i] = range(starts[i], limits[i], deltas[i])`. +// +// ```python +// >>> (rt_nested_splits, rt_dense_values) = gen_ragged_ops.ragged_range( +// ... starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) +// >>> result = ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) +// >>> print result.eval().tolist() +// [[2], # result[0] = range(2, 3) +// [], # result[1] = range(5, 5) +// [8, 9, 10, 11]] # result[2] = range(8, 12) +// ``` +// +// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. +// The vector inputs must all have the same size. Scalar inputs are broadcast +// to match the size of the vector inputs. +// +// Arguments: +// starts: The starts of each range. +// limits: The limits of each range. +// deltas: The deltas of each range. +// +// Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor`. +func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output) (rt_nested_splits tf.Output, rt_dense_values tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "RaggedRange", + Input: []tf.Input{ + starts, limits, deltas, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Deprecated, use python implementation tf.linalg.matrix_exponential. +// +// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead. +func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "MatrixExponential", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the Cholesky decomposition of one or more square matrices. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. +// +// The input has to be symmetric and positive definite. Only the lower-triangular +// part of the input will be used for this operation. The upper-triangular part +// will not be read. +// +// The output is a tensor of the same shape as the input +// containing the Cholesky decompositions for all input submatrices `[..., :, :]`. +// +// **Note**: The gradient computation on GPU is faster for large matrices but +// not for large batch dimensions when the submatrices are small. In this +// case it might be faster to use the CPU. +// +// Arguments: +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M, M]`. +func Cholesky(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Cholesky", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Writes contents to the file at input filename. Creates file and recursively +// +// creates directory if not existing. +// +// Arguments: +// filename: scalar. The name of the file to which we write the contents. +// contents: scalar. The content to be written to the output file. +// +// Returns the created operation. +func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteFile", + Input: []tf.Input{ + filename, contents, + }, + } + return scope.AddOperation(opspec) +} + +// AllAttr is an optional argument to All. +type AllAttr func(optionalAttr) + +// AllKeepDims sets the optional keep_dims attribute to value. +// +// value: If true, retain reduced dimensions with length 1. +// If not specified, defaults to false +func AllKeepDims(value bool) AllAttr { + return func(m optionalAttr) { + m["keep_dims"] = value + } +} + +// Computes the "logical and" of elements across dimensions of a tensor. +// +// Reduces `input` along the dimensions given in `axis`. Unless +// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in +// `axis`. If `keep_dims` is true, the reduced dimensions are +// retained with length 1. +// +// Arguments: +// input: The tensor to reduce. +// axis: The dimensions to reduce. Must be in the range +// `[-rank(input), rank(input))`. +// +// Returns The reduced tensor. +func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "All", + Input: []tf.Input{ + input, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the Eigen Decomposition of a batch of square self-adjoint matrices. +// +// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices, with the same constraints as the single matrix +// SelfAdjointEig. +// +// The result is a [..., M+1, M] matrix with [..., 0,:] containing the +// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues +// are sorted in non-decreasing order. +// +// Arguments: +// input: Shape is `[..., M, M]`. +// +// Returns Shape is `[..., M+1, M]`. +func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SelfAdjointEig", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes softplus gradients for a softplus operation. +// +// Arguments: +// gradients: The backpropagated gradients to the corresponding softplus operation. +// features: The features passed as input to the corresponding softplus operation. +// +// Returns The gradients: `gradients / (1 + exp(-features))`. +func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SoftplusGrad", + Input: []tf.Input{ + gradients, features, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Solves tridiagonal systems of equations. +// +// `diagonals` is a tensor of shape `[..., 3, M]` whose inner-most 2 dimensions +// represent matrices with three rows being the superdiagonal, diagonals, and +// subdiagonals, in order. The last element of the superdiagonal and the first +// element of the subdiagonal is ignored. +// `rhs` is a tensor of shape `[..., M, K]`, representing K right-hand sides per +// each left-hand side. +// The output is a tensor of shape `[..., M, K]` containing the solutions. +// +// Arguments: +// diagonals: Shape is `[..., 3, M]`. +// rhs: Shape is `[..., M, K]`. +// +// Returns Shape is `[..., M, K]`. +func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TridiagonalSolve", Input: []tf.Input{ - images, boxes, + diagonals, rhs, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Gets the next output from the given iterator. +// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2. +type SelfAdjointEigV2Attr func(optionalAttr) + +// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value. // -// This operation is a synchronous version IteratorGetNext. It should only be used -// in situations where the iterator does not block the calling thread, or where -// the calling thread is not a member of the thread pool used to execute parallel -// operations (e.g. in eager mode). -func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { +// value: If `True` then eigenvectors will be computed and returned in `v`. +// Otherwise, only the eigenvalues will be computed. +// If not specified, defaults to true +func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr { + return func(m optionalAttr) { + m["compute_v"] = value + } +} + +// Computes the eigen decomposition of one or more square self-adjoint matrices. +// +// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in +// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues +// are sorted in non-decreasing order. +// +// ```python +// # a is a tensor. +// # e is a tensor of eigenvalues. +// # v is a tensor of eigenvectors. +// e, v = self_adjoint_eig(a) +// e = self_adjoint_eig(a, compute_v=False) +// ``` +// +// Arguments: +// input: `Tensor` input of shape `[N, N]`. +// +// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`. +func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IteratorGetNextSync", + Type: "SelfAdjointEigV2", Input: []tf.Input{ - iterator, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// Adjust the saturation of one or more images. +// +// `images` is a tensor of at least 3 dimensions. The last dimension is +// interpretted as channels, and must be three. +// +// The input image is considered in the RGB colorspace. Conceptually, the RGB +// colors are first mapped into HSV. A scale is then applied all the saturation +// values, and then remapped back to RGB colorspace. +// +// Arguments: +// images: Images to adjust. At least 3-D. +// scale: A float scale to add to the saturation. +// +// Returns The hue-adjusted image or images. +func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("IteratorGetNextSync", err) - return + opspec := tf.OpSpec{ + Type: "AdjustSaturation", + Input: []tf.Input{ + images, scale, + }, } - return components + op := scope.AddOperation(opspec) + return op.Output(0) } -// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2. -type SampleDistortedBoundingBoxV2Attr func(optionalAttr) +// MatrixSolveAttr is an optional argument to MatrixSolve. +type MatrixSolveAttr func(optionalAttr) -// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value. +// MatrixSolveAdjoint sets the optional adjoint attribute to value. // -// value: If either `seed` or `seed2` are set to non-zero, the random number -// generator is seeded by the given `seed`. Otherwise, it is seeded by a random -// seed. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr { +// value: Boolean indicating whether to solve with `matrix` or its (block-wise) +// adjoint. +// If not specified, defaults to false +func MatrixSolveAdjoint(value bool) MatrixSolveAttr { return func(m optionalAttr) { - m["seed"] = value + m["adjoint"] = value } } -// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value. +// Solves systems of linear equations. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value. +// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is +// a tensor shape `[..., M, K]`. If `adjoint` is `False` then each output matrix +// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`. +// If `adjoint` is `True` then each output matrix satisfies +// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`. // -// value: The cropped area of the image must have an aspect ratio = -// width / height within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["aspect_ratio_range"] = value - } -} - -// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. +// Arguments: +// matrix: Shape is `[..., M, M]`. +// rhs: Shape is `[..., M, K]`. // -// value: The cropped area of the image must contain a fraction of the -// supplied image within this range. -// If not specified, defaults to -func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { - return func(m optionalAttr) { - m["area_range"] = value +// Returns Shape is `[..., M, K]`. +func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MatrixSolve", + Input: []tf.Input{ + matrix, rhs, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value. +// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum. +type ResourceApplyKerasMomentumAttr func(optionalAttr) + +// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value. // -// value: Number of attempts at generating a cropped region of the image -// of the specified constraints. After `max_attempts` failures, return the entire -// image. -// If not specified, defaults to 100 -func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr { +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr { return func(m optionalAttr) { - m["max_attempts"] = value + m["use_locking"] = value } } -// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// value: Controls behavior if no bounding boxes supplied. -// If true, assume an implicit bounding box covering the whole input. If false, -// raise an error. +// value: If `True`, the tensor passed to compute grad will be +// var + momentum * accum, so in the end, the var you get is actually +// var + momentum * accum. // If not specified, defaults to false -func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr { +func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr { return func(m optionalAttr) { - m["use_image_if_no_bounding_boxes"] = value + m["use_nesterov"] = value } } -// Generate a single randomly distorted bounding box for an image. -// -// Bounding box annotations are often supplied in addition to ground-truth labels -// in image recognition or object localization tasks. A common technique for -// training such a system is to randomly distort an image while preserving -// its content, i.e. *data augmentation*. This Op outputs a randomly distorted -// localization of an object, i.e. bounding box, given an `image_size`, -// `bounding_boxes` and a series of constraints. -// -// The output of this Op is a single bounding box that may be used to crop the -// original image. The output is returned as 3 tensors: `begin`, `size` and -// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the -// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize -// what the bounding box looks like. -// -// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, -// -// ```python -// # Generate a single distorted bounding box. -// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( -// tf.shape(image), -// bounding_boxes=bounding_boxes) -// -// # Draw the bounding box in an image summary. -// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), -// bbox_for_draw) -// tf.summary.image('images_with_box', image_with_box) +// Update '*var' according to the momentum scheme. Set use_nesterov = True if you // -// # Employ the bounding box to distort the image. -// distorted_image = tf.slice(image, begin, size) -// ``` +// want to use Nesterov momentum. // -// Note that if no bounding box information is available, setting -// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit -// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is -// false and no bounding boxes are supplied, an error is raised. +// accum = accum * momentum - lr * grad +// var += accum // // Arguments: -// image_size: 1-D, containing `[height, width, channels]`. -// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes -// associated with the image. -// min_object_covered: The cropped area of the image must contain at least this -// fraction of any bounding box supplied. The value of this parameter should be -// non-negative. In the case of 0, the cropped area does not need to overlap -// any of the bounding boxes supplied. +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// momentum: Momentum. Must be a scalar. // -// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to -// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to -// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. -// Provide as input to `tf.image.draw_bounding_boxes`. -func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) { +// Returns the created operation. +func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -32137,126 +34837,83 @@ func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_b a(attrs) } opspec := tf.OpSpec{ - Type: "SampleDistortedBoundingBoxV2", + Type: "ResourceApplyKerasMomentum", Input: []tf.Input{ - image_size, bounding_boxes, min_object_covered, + var_, accum, lr, grad, momentum, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return scope.AddOperation(opspec) } -// Computes requantization range per channel. +// Returns a serialized GraphDef representing `input_dataset`. +// +// Returns a graph representation for `input_dataset`. // // Arguments: -// input: The original input tensor. -// input_min: The minimum value of the input tensor -// input_max: The maximum value of the input tensor. -// clip_value_max: The maximum value of the output that needs to be clipped. -// Example: set this to 6 for Relu6. +// input_dataset: A variant tensor representing the dataset to return the graph representation for. // -// Returns The minimum value of the final output tensorThe maximum value of the final output tensor. -func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) { +// Returns The graph representation of the dataset (as serialized GraphDef). +func DatasetToGraph(scope *Scope, input_dataset tf.Output) (graph tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"clip_value_max": clip_value_max} opspec := tf.OpSpec{ - Type: "RequantizationRangePerChannel", + Type: "DatasetToGraph", Input: []tf.Input{ - input, input_min, input_max, + input_dataset, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - -// ExtractGlimpseAttr is an optional argument to ExtractGlimpse. -type ExtractGlimpseAttr func(optionalAttr) - -// ExtractGlimpseCentered sets the optional centered attribute to value. -// -// value: indicates if the offset coordinates are centered relative to -// the image, in which case the (0, 0) offset is relative to the center -// of the input images. If false, the (0,0) offset corresponds to the -// upper left corner of the input images. -// If not specified, defaults to true -func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["centered"] = value - } + return op.Output(0) } -// ExtractGlimpseNormalized sets the optional normalized attribute to value. -// -// value: indicates if the offset coordinates are normalized. -// If not specified, defaults to true -func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["normalized"] = value - } -} +// LuAttr is an optional argument to Lu. +type LuAttr func(optionalAttr) -// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value. -// -// value: indicates if the noise should be generated using a -// uniform distribution or a Gaussian distribution. -// If not specified, defaults to true -func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { +// LuOutputIdxType sets the optional output_idx_type attribute to value. +// If not specified, defaults to DT_INT32 +func LuOutputIdxType(value tf.DataType) LuAttr { return func(m optionalAttr) { - m["uniform_noise"] = value + m["output_idx_type"] = value } } -// ExtractGlimpseNoise sets the optional noise attribute to value. +// Computes the LU decomposition of one or more square matrices. // -// value: indicates if the noise should `uniform`, `gaussian`, or -// `zero`. The default is `uniform` which means the the noise type -// will be decided by `uniform_noise`. -// If not specified, defaults to "uniform" -func ExtractGlimpseNoise(value string) ExtractGlimpseAttr { - return func(m optionalAttr) { - m["noise"] = value - } -} - -// Extracts a glimpse from the input tensor. +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. // -// Returns a set of windows called glimpses extracted at location -// `offsets` from the input tensor. If the windows only partially -// overlaps the inputs, the non overlapping areas will be filled with -// random noise. +// The input has to be invertible. // -// The result is a 4-D tensor of shape `[batch_size, glimpse_height, -// glimpse_width, channels]`. The channels and batch dimensions are the -// same as that of the input tensor. The height and width of the output -// windows are specified in the `size` parameter. +// The output consists of two tensors LU and P containing the LU decomposition +// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and +// upper triangular factors. // -// The argument `normalized` and `centered` controls how the windows are built: +// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of +// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower +// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose +// entries correspond to the upper triangular part, including the diagonal, of LU. // -// * If the coordinates are normalized but not centered, 0.0 and 1.0 -// correspond to the minimum and maximum of each height and width -// dimension. -// * If the coordinates are both normalized and centered, they range from -// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper -// left corner, the lower right corner is located at (1.0, 1.0) and the -// center is at (0, 0). -// * If the coordinates are not normalized they are interpreted as -// numbers of pixels. +// P represents a permutation matrix encoded as a list of indices each between `0` +// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to +// P, then the L, U and P satisfies P_mat * input = L * U. // // Arguments: -// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. -// size: A 1-D tensor of 2 elements containing the size of the glimpses -// to extract. The glimpse height must be specified first, following -// by the glimpse width. -// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing -// the y, x locations of the center of each window. +// input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of +// size `[M, M]`. // -// Returns A tensor representing the glimpses `[batch_size, -// glimpse_height, glimpse_width, channels]`. -func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { +// Returns A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the +// lower triangular factor `L` with unit diagonal, and whose upper triangular part +// denotes the upper triangular factor `U`.Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is +// `[..., M]`. +// @compatibility(scipy) +// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are +// packed into a single tensor, the permutation is applied to `input` instead of +// the right hand side and the permutation `P` is returned as a list of indices +// instead of a permutation matrix. +// @end_compatibility +func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) { if scope.Err() != nil { return } @@ -32265,55 +34922,61 @@ func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Ou a(attrs) } opspec := tf.OpSpec{ - Type: "ExtractGlimpse", + Type: "Lu", Input: []tf.Input{ - input, size, offsets, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// A container for an iterator resource. +// Deprecated. Use TensorArrayCloseV3 // -// Returns A handle to the iterator that can be passed to a "MakeIterator" -// or "IteratorGetNext" op. -func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// DEPRECATED at GraphDef version 26: Use TensorArrayCloseV3 +// +// Returns the created operation. +func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Iterator", - - Attrs: attrs, + Type: "TensorArrayCloseV2", + Input: []tf.Input{ + handle, + }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. -type TensorForestTreeResourceHandleOpAttr func(optionalAttr) - -// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { - return func(m optionalAttr) { - m["container"] = value - } -} +// EncodeBase64Attr is an optional argument to EncodeBase64. +type EncodeBase64Attr func(optionalAttr) -// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { +// EncodeBase64Pad sets the optional pad attribute to value. +// +// value: Bool whether padding is applied at the ends. +// If not specified, defaults to false +func EncodeBase64Pad(value bool) EncodeBase64Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["pad"] = value } } -// Creates a handle to a TensorForestTreeResource -func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { +// Encode strings into web-safe base64 format. +// +// Refer to the following article for more information on base64 format: +// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the +// end so that the encoded has length multiple of 4. See Padding section of the +// link above. +// +// Web-safe means that the encoder uses - and _ instead of + and /. +// +// Arguments: +// input: Strings to be encoded. +// +// Returns Input strings encoded in base64. +func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) { if scope.Err() != nil { return } @@ -32322,181 +34985,226 @@ func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTree a(attrs) } opspec := tf.OpSpec{ - Type: "TensorForestTreeResourceHandleOp", - + Type: "EncodeBase64", + Input: []tf.Input{ + input, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. -type CropAndResizeGradImageAttr func(optionalAttr) - -// CropAndResizeGradImageMethod sets the optional method attribute to value. +// A dataset that creates window datasets from the input dataset. // -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { - return func(m optionalAttr) { - m["method"] = value +// Arguments: +// +// size: A scalar representing the number of elements to accumulate in a window. +// shift: A scalar representing the steps moving the sliding window forward in one +// iteration. It must be positive. +// stride: A scalar representing the stride of the input elements of the sliding window. +// It must be positive. +// drop_remainder: A scalar representing whether a window should be dropped in case its size is +// smaller than desired. +// +// +func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "WindowDataset", + Input: []tf.Input{ + input_dataset, size, shift, stride, drop_remainder, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the gradient of the crop_and_resize op wrt the input image tensor. +// Computes the matrix square root of one or more square matrices: +// +// matmul(sqrtm(A), sqrtm(A)) = A +// +// The input matrix should be invertible. If the input matrix is real, it should +// have no eigenvalues which are real and negative (pairs of complex conjugate +// eigenvalues are allowed). +// +// The matrix square root is computed by first reducing the matrix to +// quasi-triangular form with the real Schur decomposition. The square root +// of the quasi-triangular matrix is then computed directly. Details of +// the algorithm can be found in: Nicholas J. Higham, "Computing real +// square roots of a real matrix", Linear Algebra Appl., 1987. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the matrix square root for all input submatrices `[..., :, :]`. // // Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. -// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` -// containing the original image size. Both `image_height` and `image_width` need -// to be positive. +// input: Shape is `[..., M, M]`. // +// Returns Shape is `[..., M, M]`. // -// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { +// @compatibility(scipy) +// Equivalent to scipy.linalg.sqrtm +// @end_compatibility +func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"T": T} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "CropAndResizeGradImage", + Type: "MatrixSquareRoot", Input: []tf.Input{ - grads, boxes, box_ind, image_size, + input, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// ShuffleDatasetAttr is an optional argument to ShuffleDataset. -type ShuffleDatasetAttr func(optionalAttr) +// SvdAttr is an optional argument to Svd. +type SvdAttr func(optionalAttr) -// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. +// SvdComputeUv sets the optional compute_uv attribute to value. // -// value: If true, each iterator over this dataset will be given -// a different pseudorandomly generated seed, based on a sequence seeded by the -// `seed` and `seed2` inputs. If false, each iterator will be given the same -// seed, and repeated iteration over this dataset will yield the exact same -// sequence of results. +// value: If true, left and right singular vectors will be +// computed and returned in `u` and `v`, respectively. +// If false, `u` and `v` are not set and should never referenced. // If not specified, defaults to true -func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr { +func SvdComputeUv(value bool) SvdAttr { return func(m optionalAttr) { - m["reshuffle_each_iteration"] = value + m["compute_uv"] = value } } -// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly. +// SvdFullMatrices sets the optional full_matrices attribute to value. // -// Arguments: +// value: If true, compute full-sized `u` and `v`. If false +// (the default), compute only the leading `P` singular vectors. +// Ignored if `compute_uv` is `False`. +// If not specified, defaults to false +func SvdFullMatrices(value bool) SvdAttr { + return func(m optionalAttr) { + m["full_matrices"] = value + } +} + +// Computes the singular value decompositions of one or more matrices. // -// buffer_size: The number of output elements to buffer in an iterator over -// this dataset. Compare with the `min_after_dequeue` attr when creating a -// `RandomShuffleQueue`. -// seed: A scalar seed for the random number generator. If either `seed` or -// `seed2` is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. +// Computes the SVD of each inner matrix in `input` such that +// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])` +// +// ```python +// # a is a tensor containing a batch of matrices. +// # s is a tensor of singular values for each matrix. +// # u is the tensor containing of left singular vectors for each matrix. +// # v is the tensor containing of right singular vectors for each matrix. +// s, u, v = svd(a) +// s, _, _ = svd(a, compute_uv=False) +// ``` // +// Arguments: +// input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions +// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`. // -func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) { +// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., M, P]`; if `full_matrices` is `True` then shape is +// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is +// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`. +// Undefined if `compute_uv` is false. +func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ShuffleDataset", + Type: "Svd", Input: []tf.Input{ - input_dataset, buffer_size, seed, seed2, + input, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// 3D fast Fourier transform. +// Converts one or more images from RGB to HSV. // -// Computes the 3-dimensional discrete Fourier transform over the inner-most 3 -// dimensions of `input`. +// Outputs a tensor of the same shape as the `images` tensor, containing the HSV +// value of the pixels. The output is only well defined if the value in `images` +// are in `[0,1]`. // -// Arguments: -// input: A complex64 tensor. +// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and +// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0 +// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue. // -// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 -// dimensions of `input` are replaced with their 3D Fourier transform. +// Arguments: +// images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3. // -// @compatibility(numpy) -// Equivalent to np.fft.fftn with 3 dimensions. -// @end_compatibility -func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { +// Returns `images` converted to HSV. +func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FFT3D", + Type: "RGBToHSV", Input: []tf.Input{ - input, + images, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. -type CropAndResizeGradBoxesAttr func(optionalAttr) +// Does nothing. Only useful as a placeholder for control edges. +// +// Returns the created operation. +func NoOp(scope *Scope) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "NoOp", + } + return scope.AddOperation(opspec) +} -// CropAndResizeGradBoxesMethod sets the optional method attribute to value. +// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints. +type MergeV2CheckpointsAttr func(optionalAttr) + +// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value. // -// value: A string specifying the interpolation method. Only 'bilinear' is -// supported for now. -// If not specified, defaults to "bilinear" -func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr { +// value: see above. +// If not specified, defaults to true +func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr { return func(m optionalAttr) { - m["method"] = value + m["delete_old_dirs"] = value } } -// Computes the gradient of the crop_and_resize op wrt the input boxes tensor. +// V2 format specific: merges the metadata files of sharded checkpoints. The +// +// result is one logical checkpoint, with one physical metadata file and renamed +// data files. +// +// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup. +// +// If delete_old_dirs is true, attempts to delete recursively the dirname of each +// path in the input checkpoint_prefixes. This is useful when those paths are non +// user-facing temporary locations. // // Arguments: -// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. -// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. -// Both `image_height` and `image_width` need to be positive. -// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor -// specifies the coordinates of a box in the `box_ind[i]` image and is specified -// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of -// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the -// `[0, 1]` interval of normalized image height is mapped to -// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in -// which case the sampled crop is an up-down flipped version of the original -// image. The width dimension is treated similarly. Normalized coordinates -// outside the `[0, 1]` range are allowed, in which case we use -// `extrapolation_value` to extrapolate the input image values. -// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. -// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// checkpoint_prefixes: prefixes of V2 checkpoints to merge. +// destination_prefix: scalar. The desired final prefix. Allowed to be the same +// as one of the checkpoint_prefixes. // -// Returns A 2-D tensor of shape `[num_boxes, 4]`. -func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) { +// Returns the created operation. +func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -32505,208 +35213,182 @@ func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxe a(attrs) } opspec := tf.OpSpec{ - Type: "CropAndResizeGradBoxes", + Type: "MergeV2Checkpoints", Input: []tf.Input{ - grads, image, boxes, box_ind, + checkpoint_prefixes, destination_prefix, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Greedily selects a subset of bounding boxes in descending order of score, +// Saves input tensors slices to disk. // -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system and more -// generally is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) +// This is like `Save` except that tensors can be listed in the saved file as being +// a slice of a larger tensor. `shapes_and_slices` specifies the shape of the +// larger tensor and the slice that this tensor covers. `shapes_and_slices` must +// have as many elements as `tensor_names`. +// +// Elements of the `shapes_and_slices` input must either be: +// +// * The empty string, in which case the corresponding tensor is +// saved normally. +// * A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the +// `dimI` are the dimensions of the larger tensor and `slice-spec` +// specifies what part is covered by the tensor to save. +// +// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1` +// where each `sliceI` is either: +// +// * The string `-` meaning that the slice covers all indices of this dimension +// * `start,length` where `start` and `length` are integers. In that +// case the slice covers `length` indices starting at `start`. +// +// See also `Save`. // // Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. +// filename: Must have a single element. The name of the file to which we write the +// tensor. +// tensor_names: Shape `[N]`. The names of the tensors to be saved. +// shapes_and_slices: Shape `[N]`. The shapes and slice specifications to use when +// saving the tensors. +// data: `N` tensors to save. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`. -func NonMaxSuppressionV3(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { +// Returns the created operation. +func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV3", + Type: "SaveSlices", Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, score_threshold, + filename, tensor_names, shapes_and_slices, tf.OutputList(data), }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// NonMaxSuppressionV4Attr is an optional argument to NonMaxSuppressionV4. -type NonMaxSuppressionV4Attr func(optionalAttr) +// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation. +type DenseToDenseSetOperationAttr func(optionalAttr) -// NonMaxSuppressionV4PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value. -// -// value: If true, the output `selected_indices` is padded to be of length -// `max_output_size`. Defaults to false. -// If not specified, defaults to false -func NonMaxSuppressionV4PadToMaxOutputSize(value bool) NonMaxSuppressionV4Attr { +// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value. +// If not specified, defaults to true +func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr { return func(m optionalAttr) { - m["pad_to_max_output_size"] = value + m["validate_indices"] = value } } -// Greedily selects a subset of bounding boxes in descending order of score, +// Applies set operation along last dimension of 2 `Tensor` inputs. // -// pruning away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes with score less than -// `score_threshold` are removed. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system and more -// generally is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is a set of integers indexing into the input -// collection of bounding boxes representing the selected boxes. The bounding -// box coordinates corresponding to the selected indices can then be obtained -// using the `tf.gather operation`. For example: -// selected_indices = tf.image.non_max_suppression_v2( -// boxes, scores, max_output_size, iou_threshold, score_threshold) -// selected_boxes = tf.gather(boxes, selected_indices) +// See SetOperationOp::SetOperationFromContext for values of `set_operation`. +// +// Output `result` is a `SparseTensor` represented by `result_indices`, +// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this +// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` +// dimension contains the result of `set_operation` applied to the corresponding +// `[0...n-1]` dimension of `set`. // // Arguments: -// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. -// scores: A 1-D float tensor of shape `[num_boxes]` representing a single -// score corresponding to each box (each row of boxes). -// max_output_size: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. +// set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. +// set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. +// Dimension `n` contains values in a set, duplicates are allowed but ignored. // -// Returns A 1-D integer tensor of shape `[M]` representing the selected -// indices from the boxes tensor, where `M <= max_output_size`.A 0-D integer tensor representing the number of valid elements in -// `selected_indices`, with the valid elements appearing first. -func NonMaxSuppressionV4(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...NonMaxSuppressionV4Attr) (selected_indices tf.Output, valid_outputs tf.Output) { +// +// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is +// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]` +// is the max result set size across all `0...n-1` dimensions. +func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"set_operation": set_operation} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "NonMaxSuppressionV4", + Type: "DenseToDenseSetOperation", Input: []tf.Input{ - boxes, scores, max_output_size, iou_threshold, score_threshold, + set1, set2, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1), op.Output(2) } -// Removes keys and its associated values from a table. -// -// The tensor `keys` must of the same type as the keys of the table. Keys not -// already in the table are silently ignored. -// -// Arguments: -// table_handle: Handle to the table. -// keys: Any shape. Keys of the elements to remove. +// Generate a sharded filename. The filename is printf formatted as // -// Returns the created operation. -func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) { +// %s-%05d-of-%05d, basename, shard, num_shards. +func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "LookupTableRemoveV2", + Type: "ShardedFilename", Input: []tf.Input{ - table_handle, keys, + basename, shard, num_shards, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// CombinedNonMaxSuppressionAttr is an optional argument to CombinedNonMaxSuppression. -type CombinedNonMaxSuppressionAttr func(optionalAttr) +// Generate a glob pattern matching all sharded file names. +func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ShardedFilespec", + Input: []tf.Input{ + basename, num_shards, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} -// CombinedNonMaxSuppressionPadPerClass sets the optional pad_per_class attribute to value. +// TextLineReaderV2Attr is an optional argument to TextLineReaderV2. +type TextLineReaderV2Attr func(optionalAttr) + +// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value. // -// value: If false, the output nmsed boxes, scores and classes -// are padded/clipped to `max_total_size`. If true, the -// output nmsed boxes, scores and classes are padded to be of length -// `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in -// which case it is clipped to `max_total_size`. Defaults to false. -// If not specified, defaults to false -func CombinedNonMaxSuppressionPadPerClass(value bool) CombinedNonMaxSuppressionAttr { +// value: Number of lines to skip from the beginning of every file. +// If not specified, defaults to 0 +func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr { return func(m optionalAttr) { - m["pad_per_class"] = value + m["skip_header_lines"] = value } } -// Greedily selects a subset of bounding boxes in descending order of score, +// TextLineReaderV2Container sets the optional container attribute to value. // -// This operation performs non_max_suppression on the inputs per batch, across -// all classes. -// Prunes away boxes that have high intersection-over-union (IOU) overlap -// with previously selected boxes. Bounding boxes are supplied as -// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any -// diagonal pair of box corners and the coordinates can be provided as normalized -// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm -// is agnostic to where the origin is in the coordinate system. Also note that -// this algorithm is invariant to orthogonal transformations and translations -// of the coordinate system; thus translating or reflections of the coordinate -// system result in the same boxes being selected by the algorithm. -// The output of this operation is the final boxes, scores and classes tensor -// returned after performing non_max_suppression. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func TextLineReaderV2Container(value string) TextLineReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TextLineReaderV2SharedName sets the optional shared_name attribute to value. // -// Arguments: -// boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then -// same boxes are used for all classes otherwise, if `q` is equal to number of -// classes, class-specific boxes are used. -// scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]` -// representing a single score corresponding to each box (each row of boxes). -// max_output_size_per_class: A scalar integer tensor representing the maximum number of -// boxes to be selected by non max suppression per class -// max_total_size: A scalar representing maximum number of boxes retained over all classes. -// iou_threshold: A 0-D float tensor representing the threshold for deciding whether -// boxes overlap too much with respect to IOU. -// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove -// boxes based on score. +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A Reader that outputs the lines of a file delimited by '\n'. // -// Returns A [batch_size, max_detections, 4] float32 tensor -// containing the non-max suppressed boxes.A [batch_size, max_detections] float32 tensor -// containing the scores for the boxes.A [batch_size, max_detections] float32 tensor -// containing the classes for the boxes.A [batch_size] int32 tensor indicating the number of -// valid detections per batch item. Only the top num_detections[i] entries in -// nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the -// entries are zero paddings. -func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size_per_class tf.Output, max_total_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...CombinedNonMaxSuppressionAttr) (nmsed_boxes tf.Output, nmsed_scores tf.Output, nmsed_classes tf.Output, valid_detections tf.Output) { +// Returns The handle to reference the Reader. +func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } @@ -32715,137 +35397,192 @@ func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, a(attrs) } opspec := tf.OpSpec{ - Type: "CombinedNonMaxSuppression", - Input: []tf.Input{ - boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold, - }, + Type: "TextLineReaderV2", + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3) + return op.Output(0) } -// Computes the matrix logarithm of one or more square matrices: +// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix. +type LoadAndRemapMatrixAttr func(optionalAttr) + +// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value. // +// value: The maximum number of rows to load from the checkpoint at +// once. If less than or equal to 0, the entire matrix will be loaded into +// memory. Setting this arg trades increased disk reads for lower memory usage. +// If not specified, defaults to -1 +func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr { + return func(m optionalAttr) { + m["max_rows_in_memory"] = value + } +} + +// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint // -// \\(log(exp(A)) = A\\) +// at `ckpt_path` and potentially reorders its rows and columns using the +// specified remappings. // -// This op is only defined for complex matrices. If A is positive-definite and -// real, then casting to a complex matrix, taking the logarithm and casting back -// to a real matrix will give the correct result. +// Most users should use one of the wrapper initializers (such as +// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this +// function directly. // -// This function computes the matrix logarithm using the Schur-Parlett algorithm. -// Details of the algorithm can be found in Section 11.6.2 of: -// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008. -// ISBN 978-0-898716-46-7. +// The remappings are 1-D tensors with the following properties: // -// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions -// form square matrices. The output is a tensor of the same shape as the input -// containing the exponential for all input submatrices `[..., :, :]`. +// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output +// matrix will be initialized from the row corresponding to index +// `row_remapping[i]` in the old `Tensor` from the checkpoint. +// * `col_remapping` must have either 0 entries (indicating that no column +// reordering is needed) or `num_cols` entries. If specified, column `j` of the +// output matrix will be initialized from the column corresponding to index +// `col_remapping[j]` in the old `Tensor` from the checkpoint. +// * A value of -1 in either of the remappings signifies a "missing" entry. In that +// case, values from the `initializing_values` tensor will be used to fill that +// missing row or column. If `row_remapping` has `r` missing entries and +// `col_remapping` has `c` missing entries, then the following condition must be +// true: // -// Arguments: -// input: Shape is `[..., M, M]`. +// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` +// +// The remapping tensors can be generated using the GenerateVocabRemapping op. +// +// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], +// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing +// the value from row i, column j of the old tensor in the checkpoint, the output +// matrix will look like the following: +// +// [[w(1, 0), w(1, 2), 0.5], +// [w(0, 0), w(0, 2), -0.5], +// [0.25, -0.25, 42]] // -// Returns Shape is `[..., M, M]`. +// Arguments: +// ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from +// which the old matrix `Tensor` will be loaded. +// old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. +// row_remapping: An int `Tensor` of row remappings (generally created by +// `generate_vocab_remapping`). Even if no row remapping is needed, this must +// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted +// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). +// col_remapping: An int `Tensor` of column remappings (generally created by +// `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping +// is to be done (e.g. column ordering is the same). +// initializing_values: A float `Tensor` containing values to fill in for cells +// in the output matrix that are not loaded from the checkpoint. Length must be +// exactly the same as the number of missing / new cells. +// num_rows: Number of rows (length of the 1st dimension) in the output matrix. +// num_cols: Number of columns (length of the 2nd dimension) in the output matrix. // -// @compatibility(scipy) -// Equivalent to scipy.linalg.logm -// @end_compatibility -func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) { +// Returns Output matrix containing existing values loaded from the +// checkpoint, and with any missing values filled in from initializing_values. +func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "MatrixLogarithm", + Type: "LoadAndRemapMatrix", Input: []tf.Input{ - input, + ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// This op is used as a placeholder in If branch functions. It doesn't provide a -// valid output when run, so must either be removed (e.g. replaced with a -// function input) or guaranteed not to be used (e.g. if mirroring an -// intermediate output needed for the gradient computation of the other branch). +// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2. +type TFRecordReaderV2Attr func(optionalAttr) + +// TFRecordReaderV2Container sets the optional container attribute to value. // -// Arguments: -// dtype: The type of the output. -// shape: The purported shape of the output. This is only used for shape inference; -// the output will not necessarily have this shape. Can be a partial shape. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// TFRecordReaderV2SharedName sets the optional shared_name attribute to value. // -// Returns \"Fake\" output value. This should not be consumed by another op. -func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { - if scope.Err() != nil { - return +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - attrs := map[string]interface{}{"dtype": dtype, "shape": shape} - opspec := tf.OpSpec{ - Type: "FakeParam", +} - Attrs: attrs, +// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value. +// If not specified, defaults to "" +func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr { + return func(m optionalAttr) { + m["compression_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns the next representable value of `x1` in the direction of `x2`, element-wise. -// -// This operation returns the same result as the C++ std::nextafter function. -// -// It can also return a subnormal number. +// A Reader that outputs the records from a TensorFlow Records file. // -// @compatibility(cpp) -// Equivalent to C++ std::nextafter function. -// @end_compatibility -func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) { +// Returns The handle to reference the Reader. +func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "NextAfter", - Input: []tf.Input{ - x1, x2, - }, + Type: "TFRecordReaderV2", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the gradient for the inverse of `x` wrt its input. -// -// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy` -// is the corresponding input gradient. -func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) { - if scope.Err() != nil { - return +// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3. +type QuantizeAndDequantizeV3Attr func(optionalAttr) + +// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr { + return func(m optionalAttr) { + m["signed_input"] = value } - opspec := tf.OpSpec{ - Type: "InvGrad", - Input: []tf.Input{ - y, dy, - }, +} + +// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value. +// If not specified, defaults to true +func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr { + return func(m optionalAttr) { + m["range_given"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// List of the given size with empty elements. +// Quantizes then dequantizes a tensor. // -// element_shape: the shape of the future elements of the list -// num_elements: the number of elements to reserve -// handle: the output list -// element_dtype: the desired type of elements in the list. -func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) { +// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a +// tensor, so its value can change during training. +func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"element_dtype": element_dtype} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorListReserve", + Type: "QuantizeAndDequantizeV3", Input: []tf.Input{ - element_shape, num_elements, + input, input_min, input_max, num_bits, }, Attrs: attrs, } @@ -32853,73 +35590,77 @@ func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Ou return op.Output(0) } -// A substitute for `InterleaveDataset` on a fixed list of `N` datasets. +// IdentityReaderV2Attr is an optional argument to IdentityReaderV2. +type IdentityReaderV2Attr func(optionalAttr) + +// IdentityReaderV2Container sets the optional container attribute to value. // -// Arguments: -// selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the -// `N` data inputs should produce the next output element. -// data_input_datasets: `N` datasets with the same type that will be interleaved according to -// the values of `selector_input_dataset`. +// value: If non-empty, this reader is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func IdentityReaderV2Container(value string) IdentityReaderV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// IdentityReaderV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this reader is named in the given bucket +// with this shared_name. Otherwise, the node name is used instead. +// If not specified, defaults to "" +func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A Reader that outputs the queued work as both the key and value. // +// To use, enqueue strings in a Queue. ReaderRead will take the front +// work string and output (work, work). // -func ExperimentalDirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns The handle to reference the Reader. +func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ExperimentalDirectedInterleaveDataset", - Input: []tf.Input{ - selector_input_dataset, tf.OutputList(data_input_datasets), - }, + Type: "IdentityReaderV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// RandomUniformIntAttr is an optional argument to RandomUniformInt. -type RandomUniformIntAttr func(optionalAttr) - -// RandomUniformIntSeed sets the optional seed attribute to value. -// -// value: If either `seed` or `seed2` are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func RandomUniformIntSeed(value int64) RandomUniformIntAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} +// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent. +type ResourceApplyGradientDescentAttr func(optionalAttr) -// RandomUniformIntSeed2 sets the optional seed2 attribute to value. +// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value. // -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { +// value: If `True`, the subtraction will be protected by a lock; +// otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr { return func(m optionalAttr) { - m["seed2"] = value + m["use_locking"] = value } } -// Outputs random integers from a uniform distribution. -// -// The generated values are uniform integers in the range `[minval, maxval)`. -// The lower bound `minval` is included in the range, while the upper bound -// `maxval` is excluded. -// -// The random integers are slightly biased unless `maxval - minval` is an exact -// power of two. The bias is small for values of `maxval - minval` significantly -// smaller than the range of the output (either `2^32` or `2^64`). +// Update '*var' by subtracting 'alpha' * 'delta' from it. // // Arguments: -// shape: The shape of the output tensor. -// minval: 0-D. Inclusive lower bound on the generated integers. -// maxval: 0-D. Exclusive upper bound on the generated integers. +// var_: Should be from a Variable(). +// alpha: Scaling factor. Must be a scalar. +// delta: The change. // -// Returns A tensor of the specified shape filled with uniform random integers. -func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -32928,275 +35669,327 @@ func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf a(attrs) } opspec := tf.OpSpec{ - Type: "RandomUniformInt", + Type: "ResourceApplyGradientDescent", Input: []tf.Input{ - shape, minval, maxval, + var_, alpha, delta, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Add the quantile summaries to each quantile stream resource. +// Returns the next record (key, value pair) produced by a Reader. // -// An op that adds a list of quantile summaries to a quantile stream resource. Each -// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank) -// for a single feature. +// Will dequeue from the input queue if necessary (e.g. when the +// Reader needs to start reading from a new file since it has finished +// with the previous file). // // Arguments: -// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. -// summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature. +// reader_handle: Handle to a Reader. +// queue_handle: Handle to a Queue, with string work items. // -// Returns the created operation. -func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) { +// Returns A scalar.A scalar. +func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BoostedTreesQuantileStreamResourceAddSummaries", + Type: "ReaderReadV2", Input: []tf.Input{ - quantile_stream_resource_handle, tf.OutputList(summaries), + reader_handle, queue_handle, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } -// Creates a Dataset that returns pseudorandom numbers. +// Returns up to `num_records` (key, value) pairs produced by a Reader. // -// Arguments: -// seed: A scalar seed for the random number generator. If either seed or -// seed2 is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. +// Will dequeue from the input queue if necessary (e.g. when the +// Reader needs to start reading from a new file since it has finished +// with the previous file). +// It may return less than `num_records` even before the last batch. // +// Arguments: +// reader_handle: Handle to a `Reader`. +// queue_handle: Handle to a `Queue`, with string work items. +// num_records: number of records to read from `Reader`. // -func ExperimentalRandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns A 1-D tensor.A 1-D tensor. +func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalRandomDataset", + Type: "ReaderReadUpToV2", Input: []tf.Input{ - seed, seed2, + reader_handle, queue_handle, num_records, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// A dataset that splits the elements of its input into multiple elements. -func ExperimentalUnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Adds v into specified rows of x. +// +// Computes y = x; y[i, :] += v; return y. +// +// Arguments: +// x: A `Tensor` of type T. +// i: A vector. Indices into the left-most dimension of `x`. +// v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size. +// +// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`. +func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalUnbatchDataset", + Type: "InplaceAdd", Input: []tf.Input{ - input_dataset, + x, i, v, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that overrides the maximum intra-op parallelism. +// Restore a Reader to its initial clean state. // // Arguments: +// reader_handle: Handle to a Reader. // -// max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use. -// -// -func ExperimentalMaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns the created operation. +func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalMaxIntraOpParallelismDataset", + Type: "ReaderResetV2", Input: []tf.Input{ - input_dataset, max_intra_op_parallelism, + reader_handle, }, - Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// StringSplitV2Attr is an optional argument to StringSplitV2. -type StringSplitV2Attr func(optionalAttr) +// BatchAttr is an optional argument to Batch. +type BatchAttr func(optionalAttr) -// StringSplitV2Maxsplit sets the optional maxsplit attribute to value. -// -// value: An `int`. If `maxsplit > 0`, limit of the split of the result. -// If not specified, defaults to -1 -func StringSplitV2Maxsplit(value int64) StringSplitV2Attr { +// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value. +// If not specified, defaults to 10 +func BatchMaxEnqueuedBatches(value int64) BatchAttr { return func(m optionalAttr) { - m["maxsplit"] = value + m["max_enqueued_batches"] = value } } -// Split elements of `source` based on `sep` into a `SparseTensor`. +// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value. +// If not specified, defaults to <> +func BatchAllowedBatchSizes(value []int64) BatchAttr { + return func(m optionalAttr) { + m["allowed_batch_sizes"] = value + } +} + +// BatchContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func BatchContainer(value string) BatchAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// BatchSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func BatchSharedName(value string) BatchAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// BatchBatchingQueue sets the optional batching_queue attribute to value. +// If not specified, defaults to "" +func BatchBatchingQueue(value string) BatchAttr { + return func(m optionalAttr) { + m["batching_queue"] = value + } +} + +// Batches all input tensors nondeterministically. // -// Let N be the size of source (typically N will be the batch size). Split each -// element of `source` based on `sep` and return a `SparseTensor` -// containing the split tokens. Empty tokens are ignored. +// When many instances of this Op are being run concurrently with the same +// container/shared_name in the same device, some will output zero-shaped Tensors +// and others will output Tensors of size up to max_batch_size. // -// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', -// then the output will be -// ``` -// st.indices = [0, 0; -// 0, 1; -// 1, 0; -// 1, 1; -// 1, 2] -// st.shape = [2, 3] -// st.values = ['hello', 'world', 'a', 'b', 'c'] -// ``` +// All Tensors in in_tensors are batched together (so, for example, labels and +// features should be batched with a single instance of this operation. // -// If `sep` is given, consecutive delimiters are not grouped together and are -// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and -// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty -// string, consecutive whitespace are regarded as a single separator, and the -// result will contain no empty strings at the startor end if the string has -// leading or trailing whitespace. +// Each invocation of batch emits an `id` scalar which will be used to identify +// this particular invocation when doing unbatch or its gradient. // -// Note that the above mentioned behavior matches python's str.split. +// Each op which emits a non-empty batch will also emit a non-empty batch_index +// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id, +// start, and length of elements of each set of Tensors present in batched_tensors. // -// Arguments: -// input: `1-D` string `Tensor`, the strings to split. -// sep: `0-D` string `Tensor`, the delimiter character. -func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) { +// Batched tensors are concatenated along the first dimension, and all tensors in +// in_tensors must have the first dimension of the same size. +// +// in_tensors: The tensors to be batched. +// num_batch_threads: Number of scheduling threads for processing batches of work. +// Determines the number of batches processed in parallel. +// max_batch_size: Batch sizes will never be bigger than this. +// batch_timeout_micros: Maximum number of microseconds to wait before outputting +// an incomplete batch. +// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does +// nothing. Otherwise, supplies a list of batch sizes, causing the op to pad +// batches up to one of those sizes. The entries must increase monotonically, and +// the final entry must equal max_batch_size. +// grad_timeout_micros: The timeout to use for the gradient. See Unbatch. +// batched_tensors: Either empty tensors or a batch of concatenated Tensors. +// batch_index: If out_tensors is non-empty, has information to invert it. +// container: Controls the scope of sharing of this batch. +// id: always contains a scalar with a unique ID for this invocation of Batch. +// shared_name: Concurrently running instances of batch in the same device with the +// same container and shared_name will batch their elements together. If left +// empty, the op name will be used as the shared name. +// T: the types of tensors to be batched. +func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "StringSplitV2", + Type: "Batch", Input: []tf.Input{ - input, sep, + tf.OutputList(in_tensors), }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + if scope.Err() != nil { + return + } + var idx int + var err error + if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil { + scope.UpdateErr("Batch", err) + return + } + batch_index = op.Output(idx) + id = op.Output(idx) + return batched_tensors, batch_index, id } -// Creates a dataset that uses a custom thread pool to compute `input_dataset`. +// Adjust the hue of one or more images. // -// Arguments: +// `images` is a tensor of at least 3 dimensions. The last dimension is +// interpretted as channels, and must be three. // -// thread_pool: A resource produced by the ThreadPoolHandle op. +// The input image is considered in the RGB colorspace. Conceptually, the RGB +// colors are first mapped into HSV. A delta is then applied all the hue values, +// and then remapped back to RGB colorspace. // +// Arguments: +// images: Images to adjust. At least 3-D. +// delta: A float delta to add to the hue. // -func ExperimentalThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns The hue-adjusted image or images. +func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalThreadPoolDataset", + Type: "AdjustHue", Input: []tf.Input{ - input_dataset, thread_pool, + images, delta, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes softsign: `features / (abs(features) + 1)`. -func Softsign(scope *Scope, features tf.Output) (activations tf.Output) { +// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad. +type ResizeBicubicGradAttr func(optionalAttr) + +// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Computes the gradient of bicubic interpolation. +// +// Arguments: +// grads: 4-D with shape `[batch, height, width, channels]`. +// original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`, +// The image tensor that was resized. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. +// Gradients with respect to the input image. Input image must have been +// float or double. +func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Softsign", + Type: "ResizeBicubicGrad", Input: []tf.Input{ - features, + grads, original_image, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// EncodeProtoAttr is an optional argument to EncodeProto. -type EncodeProtoAttr func(optionalAttr) +// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor. +type ResizeNearestNeighborAttr func(optionalAttr) -// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value. -// If not specified, defaults to "local://" -func EncodeProtoDescriptorSource(value string) EncodeProtoAttr { +// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value. +// +// value: If true, the centers of the 4 corner pixels of the input and output tensors are +// aligned, preserving the values at the corner pixels. Defaults to false. +// If not specified, defaults to false +func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr { return func(m optionalAttr) { - m["descriptor_source"] = value + m["align_corners"] = value } } -// The op serializes protobuf messages provided in the input tensors. -// -// The types of the tensors in `values` must match the schema for the -// fields specified in `field_names`. All the tensors in `values` must -// have a common shape prefix, *batch_shape*. -// -// The `sizes` tensor specifies repeat counts for each field. The repeat -// count (last dimension) of a each tensor in `values` must be greater -// than or equal to corresponding repeat count in `sizes`. -// -// A `message_type` name must be provided to give context for the field -// names. The actual message descriptor can be looked up either in the -// linked-in descriptor pool or a filename provided by the caller using -// the `descriptor_source` attribute. -// -// The `descriptor_source` attribute selects a source of protocol -// descriptors to consult when looking up `message_type`. This may be a -// filename containing a serialized `FileDescriptorSet` message, -// or the special value `local://`, in which case only descriptors linked -// into the code will be searched; the filename can be on any filesystem -// accessible to TensorFlow. -// -// You can build a `descriptor_source` file using the `--descriptor_set_out` -// and `--include_imports` options to the protocol compiler `protoc`. -// -// The `local://` database only covers descriptors linked into the -// code via C++ libraries, not Python imports. You can link in a proto descriptor -// by creating a cc_library target with alwayslink=1. -// -// There are a few special cases in the value mapping: -// -// Submessage and group fields must be pre-serialized as TensorFlow strings. -// -// TensorFlow lacks support for unsigned int64s, so they must be -// represented as `tf.int64` with the same twos-complement bit pattern -// (the obvious way). -// -// Unsigned int32 values can be represented exactly with `tf.int64`, or -// with sign wrapping if the input is of type `tf.int32`. +// Resize `images` to `size` using nearest neighbor interpolation. // // Arguments: -// sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`. -// values: List of tensors containing values for the corresponding field. -// field_names: List of strings containing proto field names. -// message_type: Name of the proto message type to decode. +// images: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The +// new size for the images. // -// Returns Tensor of serialized protos with shape `batch_shape`. -func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) { +// Returns 4-D with shape +// `[batch, new_height, new_width, channels]`. +func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "EncodeProto", + Type: "ResizeNearestNeighbor", Input: []tf.Input{ - sizes, tf.OutputList(values), + images, size, }, Attrs: attrs, } @@ -33204,163 +35997,144 @@ func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names return op.Output(0) } -// Creates a TensorArray for storing the gradients of values in the given handle. -// -// If the given TensorArray gradient already exists, returns a reference to it. -// -// Locks the size of the original TensorArray by disabling its dynamic size flag. -// -// **A note about the input flow_in:** -// -// The handle flow_in forces the execution of the gradient lookup to occur -// only after certain other operations have occurred. For example, when -// the forward TensorArray is dynamically sized, writes to this TensorArray -// may resize the object. The gradient TensorArray is statically sized based -// on the size of the forward TensorArray when this operation executes. -// Furthermore, the size of the forward TensorArray is frozen by this call. -// As a result, the flow is used to ensure that the call to generate the gradient -// TensorArray only happens after all writes are executed. -// -// In the case of dynamically sized TensorArrays, gradient computation should -// only be performed on read operations that have themselves been chained via -// flow to occur only after all writes have executed. That way the final size -// of the forward TensorArray is known when this operation is called. -// -// **A note about the source attribute:** -// -// TensorArray gradient calls use an accumulator TensorArray object. If -// multiple gradients are calculated and run in the same session, the multiple -// gradient nodes may accidentally flow through the same accumulator TensorArray. -// This double counts and generally breaks the TensorArray gradient flow. -// -// The solution is to identify which gradient call this particular -// TensorArray gradient is being called in. This is performed by identifying -// a unique string (e.g. "gradients", "gradients_1", ...) from the input -// gradient Tensor's name. This string is used as a suffix when creating -// the TensorArray gradient object here (the attribute `source`). +// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad. +type ResizeNearestNeighborGradAttr func(optionalAttr) + +// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value. // -// The attribute `source` is added as a suffix to the forward TensorArray's -// name when performing the creation / lookup, so that each separate gradient -// calculation gets its own TensorArray accumulator. +// value: If true, the centers of the 4 corner pixels of the input and grad tensors are +// aligned. Defaults to false. +// If not specified, defaults to false +func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr { + return func(m optionalAttr) { + m["align_corners"] = value + } +} + +// Computes the gradient of nearest neighbor interpolation. // // Arguments: -// handle: The handle to the forward TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// source: The gradient source string, used to decide which gradient TensorArray -// to return. -func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { +// grads: 4-D with shape `[batch, height, width, channels]`. +// size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The +// original input size. +// +// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients +// with respect to the input image. +func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"source": source} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "TensorArrayGradV3", + Type: "ResizeNearestNeighborGrad", Input: []tf.Input{ - handle, flow_in, + grads, size, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Creates a dataset that splits a SparseTensor into elements row-wise. -func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseTensorSliceDataset", - Input: []tf.Input{ - indices, values, dense_shape, - }, +// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape. +type ExtractJpegShapeAttr func(optionalAttr) + +// ExtractJpegShapeOutputType sets the optional output_type attribute to value. +// +// value: (Optional) The output type of the operation (int32 or int64). +// Defaults to int32. +// If not specified, defaults to DT_INT32 +func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr { + return func(m optionalAttr) { + m["output_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Returns x / y element-wise for real types. +// Extract the shape information of a JPEG-encoded image. // -// If `x` and `y` are reals, this will return the floating-point division. +// This op only parses the image header, so it is much faster than DecodeJpeg. // -// *NOTE*: `Div` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// Arguments: +// contents: 0-D. The JPEG-encoded image. +// +// Returns 1-D. The image shape with format [height, width, channels]. +func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "RealDiv", + Type: "ExtractJpegShape", Input: []tf.Input{ - x, y, + contents, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that concatenates `input_dataset` with `another_dataset`. -func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return +// DecodePngAttr is an optional argument to DecodePng. +type DecodePngAttr func(optionalAttr) + +// DecodePngChannels sets the optional channels attribute to value. +// +// value: Number of color channels for the decoded image. +// If not specified, defaults to 0 +func DecodePngChannels(value int64) DecodePngAttr { + return func(m optionalAttr) { + m["channels"] = value } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ConcatenateDataset", - Input: []tf.Input{ - input_dataset, another_dataset, - }, - Attrs: attrs, +} + +// DecodePngDtype sets the optional dtype attribute to value. +// If not specified, defaults to DT_UINT8 +func DecodePngDtype(value tf.DataType) DecodePngAttr { + return func(m optionalAttr) { + m["dtype"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors. +// Decode a PNG-encoded image to a uint8 or uint16 tensor. // -// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the -// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each -// input channel is processed independently of the others with its own structuring -// function. The `output` tensor has shape -// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output -// tensor depend on the `padding` algorithm. We currently only support the default -// "NHWC" `data_format`. +// The attr `channels` indicates the desired number of color channels for the +// decoded image. // -// In detail, the grayscale morphological 2-D dilation is the max-sum correlation -// (for consistency with `conv2d`, we use unmirrored filters): +// Accepted values are: // -// output[b, y, x, c] = -// max_{dy, dx} input[b, -// strides[1] * y + rates[1] * dy, -// strides[2] * x + rates[2] * dx, -// c] + -// filter[dy, dx, c] +// * 0: Use the number of channels in the PNG-encoded image. +// * 1: output a grayscale image. +// * 3: output an RGB image. +// * 4: output an RGBA image. // -// Max-pooling is a special case when the filter has size equal to the pooling -// kernel size and contains all zeros. +// If needed, the PNG-encoded image is transformed to match the requested number +// of color channels. // -// Note on duality: The dilation of `input` by the `filter` is equal to the -// negation of the erosion of `-input` by the reflected `filter`. +// This op also supports decoding JPEGs and non-animated GIFs since the interface +// is the same, though it is cleaner to use `tf.image.decode_image`. // // Arguments: -// input: 4-D with shape `[batch, in_height, in_width, depth]`. -// filter: 3-D with shape `[filter_height, filter_width, depth]`. -// strides: The stride of the sliding window for each dimension of the input -// tensor. Must be: `[1, stride_height, stride_width, 1]`. -// rates: The input stride for atrous morphological dilation. Must be: -// `[1, rate_height, rate_width, 1]`. -// padding: The type of padding algorithm to use. +// contents: 0-D. The PNG-encoded image. // -// Returns 4-D with shape `[batch, out_height, out_width, depth]`. -func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) { +// Returns 3-D with shape `[height, width, channels]`. +func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "Dilation2D", + Type: "DecodePng", Input: []tf.Input{ - input, filter, + contents, }, Attrs: attrs, } @@ -33368,624 +36142,599 @@ func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64 return op.Output(0) } -// Converts the given variant tensor to an iterator and stores it in the given resource. +// Decode the first frame of a GIF-encoded image to a uint8 tensor. +// +// GIF with frame or transparency compression are not supported +// convert animated GIF from compressed to uncompressed by: +// +// convert $src.gif -coalesce $dst.gif +// +// This op also supports decoding JPEGs and PNGs, though it is cleaner to use +// `tf.image.decode_image`. // // Arguments: -// resource_handle: A handle to an iterator resource. -// serialized: A variant tensor storing the state of the iterator contained in the -// resource. +// contents: 0-D. The GIF-encoded image. // -// Returns the created operation. -func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { +// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB order +func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "DeserializeIterator", + Type: "DecodeGif", Input: []tf.Input{ - resource_handle, serialized, + contents, }, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub. -type ResourceScatterNdSubAttr func(optionalAttr) +// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler. +type LearnedUnigramCandidateSamplerAttr func(optionalAttr) -// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value. +// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value. // -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr { +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["seed"] = value } } -// Applies sparse subtraction to individual values or slices in a Variable. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]] -// ``` -// -// For example, say we want to subtract 4 scattered elements from a rank-1 tensor -// with 8 elements. In Python, that subtraction would look like this: +// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. // -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// sub = tf.scatter_nd_sub(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(sub) -// ``` +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a learned unigram distribution. // -// The resulting update to ref would look like this: +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. // -// [1, -9, 3, -6, -4, 6, 7, -4] +// For each batch, this op picks a single set of sampled candidate labels. // -// See `tf.scatter_nd` for more details about how to make updates to -// slices. +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. // // Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of -// values to add to ref. +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). // -// Returns the created operation. -func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) { +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceScatterNdSub", + Type: "LearnedUnigramCandidateSampler", Input: []tf.Input{ - ref, indices, updates, + true_classes, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) } -// Creates a dataset that batches and pads `batch_size` elements from the input. -// -// Arguments: +// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2. +type RandomShuffleQueueV2Attr func(optionalAttr) + +// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value. // -// batch_size: A scalar representing the number of elements to accumulate in a -// batch. -// padded_shapes: A list of int64 tensors representing the desired padded shapes -// of the corresponding output components. These shapes may be partially -// specified, using `-1` to indicate that a particular dimension should be -// padded to the maximum size of all batch elements. -// padding_values: A list of scalars containing the padding value to use for -// each of the outputs. +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> // -func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "PaddedBatchDataset", - Input: []tf.Input{ - input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), - }, - Attrs: attrs, +// REQUIRES: len(value) >= 0 +func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a dataset that shuffles and repeats elements from `input_dataset` +// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value. // -// pseudorandomly. +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value. // -// Arguments: +// value: Dequeue will block unless there would be this +// many elements after the dequeue or the queue is closed. This +// ensures a minimum level of mixing of elements. +// If not specified, defaults to 0 +func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["min_after_dequeue"] = value + } +} + +// RandomShuffleQueueV2Seed sets the optional seed attribute to value. // -// buffer_size: The number of output elements to buffer in an iterator over -// this dataset. Compare with the `min_after_dequeue` attr when creating a -// `RandomShuffleQueue`. -// seed: A scalar seed for the random number generator. If either `seed` or -// `seed2` is set to be non-zero, the random number generator is seeded -// by the given seed. Otherwise, a random seed is used. -// seed2: A second scalar seed to avoid seed collision. -// count: A scalar representing the number of times the underlying dataset -// should be repeated. The default is `-1`, which results in infinite repetition. +// value: If either seed or seed2 is set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value. // +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// RandomShuffleQueueV2Container sets the optional container attribute to value. // -func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "ShuffleAndRepeatDataset", - Input: []tf.Input{ - input_dataset, buffer_size, seed, seed2, count, - }, - Attrs: attrs, +} + +// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Creates a dataset that caches elements from `input_dataset`. -// -// A CacheDataset will iterate over the input_dataset, and store tensors. If the -// cache already exists, the cache will be used. If the cache is inappropriate -// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error -// will the returned when used. +// A queue that randomizes the order of elements. // // Arguments: +// component_types: The type of each component in a value. // -// filename: A path on the filesystem where we should cache the dataset. Note: this -// will be a directory. -// -// -func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns The handle to the queue. +func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "CacheDataset", - Input: []tf.Input{ - input_dataset, filename, - }, + Type: "RandomShuffleQueueV2", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Creates a dataset that emits the records from one or more binary files. +// SerializeSparseAttr is an optional argument to SerializeSparse. +type SerializeSparseAttr func(optionalAttr) + +// SerializeSparseOutType sets the optional out_type attribute to value. // -// Arguments: -// filenames: A scalar or a vector containing the name(s) of the file(s) to be -// read. -// header_bytes: A scalar representing the number of bytes to skip at the -// beginning of a file. -// record_bytes: A scalar representing the number of bytes in each record. -// footer_bytes: A scalar representing the number of bytes to skip at the end -// of a file. -// buffer_size: A scalar representing the number of bytes to buffer. Must be > 0. -func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "FixedLengthRecordDataset", - Input: []tf.Input{ - filenames, header_bytes, record_bytes, footer_bytes, buffer_size, - }, +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { + return func(m optionalAttr) { + m["out_type"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Gradients for batch normalization. -// -// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() -// -// This op is deprecated. See `tf.nn.batch_normalization`. +// Serialize a `SparseTensor` into a `[3]` `Tensor` object. // // Arguments: -// t: A 4D input Tensor. -// m: A 1D mean Tensor with size matching the last dimension of t. -// This is the first output from tf.nn.moments, -// or a saved moving average thereof. -// v: A 1D variance Tensor with size matching the last dimension of t. -// This is the second output from tf.nn.moments, -// or a saved moving average thereof. -// gamma: A 1D gamma Tensor with size matching the last dimension of t. -// If "scale_after_normalization" is true, this Tensor will be multiplied -// with the normalized Tensor. -// backprop: 4D backprop Tensor. -// variance_epsilon: A small float number to avoid dividing by 0. -// scale_after_normalization: A bool indicating whether the resulted tensor -// needs to be multiplied with gamma. -// -// Returns 4D backprop tensor for input.1D backprop tensor for mean.1D backprop tensor for variance.1D backprop tensor for beta.1D backprop tensor for gamma. -func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) { +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchNormWithGlobalNormalizationGrad", + Type: "SerializeSparse", Input: []tf.Input{ - t, m, v, gamma, backprop, + sparse_indices, sparse_values, sparse_shape, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0) } -// Creates a dataset that emits the records from one or more TFRecord files. +// Draw bounding boxes on a batch of images. +// +// Outputs a copy of `images` but draws on top of the pixels zero or more bounding +// boxes specified by the locations in `boxes`. The coordinates of the each +// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, if an image is 100 x 200 pixels (height x width) and the bounding +// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of +// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). +// +// Parts of the bounding box may fall outside the image. // // Arguments: -// filenames: A scalar or vector containing the name(s) of the file(s) to be -// read. -// compression_type: A scalar containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". -// buffer_size: A scalar representing the number of bytes to buffer. A value of -// 0 means no buffering will be performed. -func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { +// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. +// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding +// boxes. +// +// Returns 4-D with the same shape as `images`. The batch of input images with +// bounding boxes drawn on the images. +func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TFRecordDataset", + Type: "DrawBoundingBoxes", Input: []tf.Input{ - filenames, compression_type, buffer_size, + images, boxes, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ExperimentalStatsAggregatorHandleAttr is an optional argument to ExperimentalStatsAggregatorHandle. -type ExperimentalStatsAggregatorHandleAttr func(optionalAttr) +// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2. +type SampleDistortedBoundingBoxV2Attr func(optionalAttr) -// ExperimentalStatsAggregatorHandleContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func ExperimentalStatsAggregatorHandleContainer(value string) ExperimentalStatsAggregatorHandleAttr { +// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value. +// +// value: If either `seed` or `seed2` are set to non-zero, the random number +// generator is seeded by the given `seed`. Otherwise, it is seeded by a random +// seed. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["container"] = value + m["seed"] = value } } -// ExperimentalStatsAggregatorHandleSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func ExperimentalStatsAggregatorHandleSharedName(value string) ExperimentalStatsAggregatorHandleAttr { +// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { - m["shared_name"] = value + m["seed2"] = value } } -// Creates a statistics manager resource. -func ExperimentalStatsAggregatorHandle(scope *Scope, optional ...ExperimentalStatsAggregatorHandleAttr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) +// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value. +// +// value: The cropped area of the image must have an aspect ratio = +// width / height within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["aspect_ratio_range"] = value } - opspec := tf.OpSpec{ - Type: "ExperimentalStatsAggregatorHandle", +} - Attrs: attrs, +// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. +// +// value: The cropped area of the image must contain a fraction of the +// supplied image within this range. +// If not specified, defaults to +func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["area_range"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// A container for an iterator resource. +// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value. // -// Returns A handle to the iterator that can be passed to a "MakeIterator" or -// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents -// resource sharing by name, and does not keep a reference to the resource -// container. -func AnonymousIterator(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { - if scope.Err() != nil { - return +// value: Number of attempts at generating a cropped region of the image +// of the specified constraints. After `max_attempts` failures, return the entire +// image. +// If not specified, defaults to 100 +func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["max_attempts"] = value } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "AnonymousIterator", +} - Attrs: attrs, +// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value. +// +// value: Controls behavior if no bounding boxes supplied. +// If true, assume an implicit bounding box covering the whole input. If false, +// raise an error. +// If not specified, defaults to false +func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr { + return func(m optionalAttr) { + m["use_image_if_no_bounding_boxes"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// BatchToSpace for 4-D tensors of type T. -// -// This is a legacy version of the more general BatchToSpaceND. -// -// Rearranges (permutes) data from batch into blocks of spatial data, followed by -// cropping. This is the reverse transformation of SpaceToBatch. More specifically, -// this op outputs a copy of the input tensor where values from the `batch` -// dimension are moved in spatial blocks to the `height` and `width` dimensions, -// followed by cropping along the `height` and `width` dimensions. -// -// Arguments: -// input: 4-D tensor with shape -// `[batch*block_size*block_size, height_pad/block_size, width_pad/block_size, -// depth]`. Note that the batch size of the input tensor must be divisible by -// `block_size * block_size`. -// crops: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies -// how many elements to crop from the intermediate result across the spatial -// dimensions as follows: -// -// crops = [[crop_top, crop_bottom], [crop_left, crop_right]] -// -// -// Returns 4-D with shape `[batch, height, width, depth]`, where: -// -// height = height_pad - crop_top - crop_bottom -// width = width_pad - crop_left - crop_right -// -// The attr `block_size` must be greater than one. It indicates the block size. -// -// Some examples: -// -// (1) For the following input of shape `[4, 1, 1, 1]` and block_size of 2: -// -// ``` -// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] -// ``` -// -// The output tensor has shape `[1, 2, 2, 1]` and value: -// -// ``` -// x = [[[[1], [2]], [[3], [4]]]] -// ``` -// -// (2) For the following input of shape `[4, 1, 1, 3]` and block_size of 2: +// Generate a single randomly distorted bounding box for an image. // -// ``` -// [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] -// ``` +// Bounding box annotations are often supplied in addition to ground-truth labels +// in image recognition or object localization tasks. A common technique for +// training such a system is to randomly distort an image while preserving +// its content, i.e. *data augmentation*. This Op outputs a randomly distorted +// localization of an object, i.e. bounding box, given an `image_size`, +// `bounding_boxes` and a series of constraints. // -// The output tensor has shape `[1, 2, 2, 3]` and value: +// The output of this Op is a single bounding box that may be used to crop the +// original image. The output is returned as 3 tensors: `begin`, `size` and +// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the +// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize +// what the bounding box looks like. // -// ``` -// x = [[[[1, 2, 3], [4, 5, 6]], -// [[7, 8, 9], [10, 11, 12]]]] -// ``` +// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. // -// (3) For the following input of shape `[4, 2, 2, 1]` and block_size of 2: +// For example, // -// ``` -// x = [[[[1], [3]], [[9], [11]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` +// ```python +// # Generate a single distorted bounding box. +// begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( +// tf.shape(image), +// bounding_boxes=bounding_boxes) // -// The output tensor has shape `[1, 4, 4, 1]` and value: +// # Draw the bounding box in an image summary. +// image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), +// bbox_for_draw) +// tf.summary.image('images_with_box', image_with_box) // +// # Employ the bounding box to distort the image. +// distorted_image = tf.slice(image, begin, size) // ``` -// x = [[[1], [2], [3], [4]], -// [[5], [6], [7], [8]], -// [[9], [10], [11], [12]], -// [[13], [14], [15], [16]]] -// ``` -// -// (4) For the following input of shape `[8, 1, 2, 1]` and block_size of 2: // -// ``` -// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]], -// [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]] -// ``` +// Note that if no bounding box information is available, setting +// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit +// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is +// false and no bounding boxes are supplied, an error is raised. // -// The output tensor has shape `[2, 2, 4, 1]` and value: +// Arguments: +// image_size: 1-D, containing `[height, width, channels]`. +// bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes +// associated with the image. +// min_object_covered: The cropped area of the image must contain at least this +// fraction of any bounding box supplied. The value of this parameter should be +// non-negative. In the case of 0, the cropped area does not need to overlap +// any of the bounding boxes supplied. // -// ``` -// x = [[[[1], [3]], [[5], [7]]], -// [[[2], [4]], [[10], [12]]], -// [[[5], [7]], [[13], [15]]], -// [[[6], [8]], [[14], [16]]]] -// ``` -func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int64) (output tf.Output) { +// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to +// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to +// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box. +// Provide as input to `tf.image.draw_bounding_boxes`. +func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"block_size": block_size} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "BatchToSpace", + Type: "SampleDistortedBoundingBoxV2", Input: []tf.Input{ - input, crops, + image_size, bounding_boxes, min_object_covered, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2) } -// Produces a summary of any statistics recorded by the given statistics manager. -func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) { +// Computes requantization range per channel. +// +// Arguments: +// input: The original input tensor. +// input_min: The minimum value of the input tensor +// input_max: The maximum value of the input tensor. +// clip_value_max: The maximum value of the output that needs to be clipped. +// Example: set this to 6 for Relu6. +// +// Returns The minimum value of the final output tensorThe maximum value of the final output tensor. +func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"clip_value_max": clip_value_max} opspec := tf.OpSpec{ - Type: "ExperimentalStatsAggregatorSummary", + Type: "RequantizationRangePerChannel", Input: []tf.Input{ - iterator, + input, input_min, input_max, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Makes a new iterator from the given `dataset` and stores it in `iterator`. -// -// This operation may be executed multiple times. Each execution will reset the -// iterator in `iterator` to the first element of `dataset`. +// ExtractGlimpseAttr is an optional argument to ExtractGlimpse. +type ExtractGlimpseAttr func(optionalAttr) + +// ExtractGlimpseCentered sets the optional centered attribute to value. // -// Returns the created operation. -func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "MakeIterator", - Input: []tf.Input{ - dataset, iterator, - }, +// value: indicates if the offset coordinates are centered relative to +// the image, in which case the (0, 0) offset is relative to the center +// of the input images. If false, the (0,0) offset corresponds to the +// upper left corner of the input images. +// If not specified, defaults to true +func ExtractGlimpseCentered(value bool) ExtractGlimpseAttr { + return func(m optionalAttr) { + m["centered"] = value } - return scope.AddOperation(opspec) } -// Adjust the contrast of one or more images. -// -// `images` is a tensor of at least 3 dimensions. The last 3 dimensions are -// interpreted as `[height, width, channels]`. The other dimensions only -// represent a collection of images, such as `[batch, height, width, channels].` -// -// Contrast is adjusted independently for each channel of each image. -// -// For each channel, the Op first computes the mean of the image pixels in the -// channel and then adjusts each component of each pixel to -// `(x - mean) * contrast_factor + mean`. -// -// Arguments: -// images: Images to adjust. At least 3-D. -// contrast_factor: A float multiplier for adjusting contrast. +// ExtractGlimpseNormalized sets the optional normalized attribute to value. // -// Returns The contrast-adjusted image or images. -func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "AdjustContrastv2", - Input: []tf.Input{ - images, contrast_factor, - }, +// value: indicates if the offset coordinates are normalized. +// If not specified, defaults to true +func ExtractGlimpseNormalized(value bool) ExtractGlimpseAttr { + return func(m optionalAttr) { + m["normalized"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Gets the next output from the given iterator . -func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} - opspec := tf.OpSpec{ - Type: "IteratorGetNext", - Input: []tf.Input{ - iterator, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return +// ExtractGlimpseUniformNoise sets the optional uniform_noise attribute to value. +// +// value: indicates if the noise should be generated using a +// uniform distribution or a Gaussian distribution. +// If not specified, defaults to true +func ExtractGlimpseUniformNoise(value bool) ExtractGlimpseAttr { + return func(m optionalAttr) { + m["uniform_noise"] = value } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("IteratorGetNext", err) - return +} + +// ExtractGlimpseNoise sets the optional noise attribute to value. +// +// value: indicates if the noise should `uniform`, `gaussian`, or +// `zero`. The default is `uniform` which means the the noise type +// will be decided by `uniform_noise`. +// If not specified, defaults to "uniform" +func ExtractGlimpseNoise(value string) ExtractGlimpseAttr { + return func(m optionalAttr) { + m["noise"] = value } - return components } -// Outputs the single element from the given dataset. +// Extracts a glimpse from the input tensor. // -// Arguments: -// dataset: A handle to a dataset that contains a single element. +// Returns a set of windows called glimpses extracted at location +// `offsets` from the input tensor. If the windows only partially +// overlaps the inputs, the non overlapping areas will be filled with +// random noise. // +// The result is a 4-D tensor of shape `[batch_size, glimpse_height, +// glimpse_width, channels]`. The channels and batch dimensions are the +// same as that of the input tensor. The height and width of the output +// windows are specified in the `size` parameter. // +// The argument `normalized` and `centered` controls how the windows are built: // -// Returns The components of the single element of `input`. -func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { +// * If the coordinates are normalized but not centered, 0.0 and 1.0 +// correspond to the minimum and maximum of each height and width +// dimension. +// * If the coordinates are both normalized and centered, they range from +// -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper +// left corner, the lower right corner is located at (1.0, 1.0) and the +// center is at (0, 0). +// * If the coordinates are not normalized they are interpreted as +// numbers of pixels. +// +// Arguments: +// input: A 4-D float tensor of shape `[batch_size, height, width, channels]`. +// size: A 1-D tensor of 2 elements containing the size of the glimpses +// to extract. The glimpse height must be specified first, following +// by the glimpse width. +// offsets: A 2-D integer tensor of shape `[batch_size, 2]` containing +// the y, x locations of the center of each window. +// +// Returns A tensor representing the glimpses `[batch_size, +// glimpse_height, glimpse_width, channels]`. +func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Output, optional ...ExtractGlimpseAttr) (glimpse tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "DatasetToSingleElement", + Type: "ExtractGlimpse", Input: []tf.Input{ - dataset, + input, size, offsets, }, Attrs: attrs, } op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("DatasetToSingleElement", err) - return - } - return components + return op.Output(0) } -// Converts the given `resource_handle` representing an iterator to a string. -// -// Arguments: -// resource_handle: A handle to an iterator resource. +// A container for an iterator resource. // -// Returns A string representation of the given handle. -func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) { +// Returns A handle to the iterator that can be passed to a "MakeIterator" +// or "IteratorGetNext" op. +func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "IteratorToStringHandle", - Input: []tf.Input{ - resource_handle, - }, + Type: "Iterator", + + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle. -type IteratorFromStringHandleAttr func(optionalAttr) +// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp. +type TensorForestTreeResourceHandleOpAttr func(optionalAttr) -// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. -// -// value: If specified, defines the type of each tuple component in an -// element produced by the resulting iterator. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr { +// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr { return func(m optionalAttr) { - m["output_types"] = value + m["container"] = value } } -// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. -// -// value: If specified, defines the shape of each tuple component in an -// element produced by the resulting iterator. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr { +// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr { return func(m optionalAttr) { - m["output_shapes"] = value + m["shared_name"] = value } } -// Converts the given string representing a handle to an iterator to a resource. -// -// Arguments: -// string_handle: A string representation of the given handle. -// -// Returns A handle to an iterator resource. -func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) { +// Creates a handle to a TensorForestTreeResource +func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) { if scope.Err() != nil { return } @@ -33994,416 +36743,441 @@ func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional .. a(attrs) } opspec := tf.OpSpec{ - Type: "IteratorFromStringHandle", - Input: []tf.Input{ - string_handle, - }, + Type: "TensorForestTreeResourceHandleOp", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Gather slices from `params` axis `axis` according to `indices`. -// -// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). -// Produces an output tensor with shape `params.shape[:axis] + indices.shape + -// params.shape[axis + 1:]` where: -// -// ```python -// # Scalar indices (output is rank(params) - 1). -// output[a_0, ..., a_n, b_0, ..., b_n] = -// params[a_0, ..., a_n, indices, b_0, ..., b_n] -// -// # Vector indices (output is rank(params)). -// output[a_0, ..., a_n, i, b_0, ..., b_n] = -// params[a_0, ..., a_n, indices[i], b_0, ..., b_n] -// -// # Higher rank indices (output is rank(params) + rank(indices) - 1). -// output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] = -// params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n] -// ``` -// -//
-// -//
-// -// Note that on CPU, if an out of bound index is found, an error is returned. -// On GPU, if an out of bound index is found, a 0 is stored in the -// corresponding output value. -// -// See also `tf.batch_gather` and `tf.gather_nd`. -// -// Arguments: -// params: The tensor from which to gather values. Must be at least rank -// `axis + 1`. -// indices: Index tensor. Must be in range `[0, params.shape[axis])`. -// axis: The axis in `params` to gather `indices` from. Defaults to the first -// dimension. Supports negative indexes. +// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage. +type CropAndResizeGradImageAttr func(optionalAttr) + +// CropAndResizeGradImageMethod sets the optional method attribute to value. // -// Returns Values from `params` gathered from indices given by `indices`, with -// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`. -func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "GatherV2", - Input: []tf.Input{ - params, indices, axis, - }, +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr { + return func(m optionalAttr) { + m["method"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// Converts the given `resource_handle` representing an iterator to a variant tensor. +// Computes the gradient of the crop_and_resize op wrt the input image tensor. // // Arguments: -// resource_handle: A handle to an iterator resource. +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. +// image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]` +// containing the original image size. Both `image_height` and `image_width` need +// to be positive. // -// Returns A variant tensor storing the state of the iterator contained in the -// resource. -func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) { +// +// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"T": T} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "SerializeIterator", + Type: "CropAndResizeGradImage", Input: []tf.Input{ - resource_handle, + grads, boxes, box_ind, image_size, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FIFOQueueV2Attr is an optional argument to FIFOQueueV2. -type FIFOQueueV2Attr func(optionalAttr) - -// FIFOQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} +// ShuffleDatasetAttr is an optional argument to ShuffleDataset. +type ShuffleDatasetAttr func(optionalAttr) -// FIFOQueueV2Capacity sets the optional capacity attribute to value. +// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value. // -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr { +// value: If true, each iterator over this dataset will be given +// a different pseudorandomly generated seed, based on a sequence seeded by the +// `seed` and `seed2` inputs. If false, each iterator will be given the same +// seed, and repeated iteration over this dataset will yield the exact same +// sequence of results. +// If not specified, defaults to true +func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr { return func(m optionalAttr) { - m["capacity"] = value + m["reshuffle_each_iteration"] = value } } -// FIFOQueueV2Container sets the optional container attribute to value. +// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly. // -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func FIFOQueueV2Container(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// FIFOQueueV2SharedName sets the optional shared_name attribute to value. +// Arguments: // -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements in first-in first-out order. +// buffer_size: The number of output elements to buffer in an iterator over +// this dataset. Compare with the `min_after_dequeue` attr when creating a +// `RandomShuffleQueue`. +// seed: A scalar seed for the random number generator. If either `seed` or +// `seed2` is set to be non-zero, the random number generator is seeded +// by the given seed. Otherwise, a random seed is used. +// seed2: A second scalar seed to avoid seed collision. // -// Arguments: -// component_types: The type of each component in a value. // -// Returns The handle to the queue. -func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) { +func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FIFOQueueV2", - + Type: "ShuffleDataset", + Input: []tf.Input{ + input_dataset, buffer_size, seed, seed2, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deserializes a proto into the tree handle +// 3D fast Fourier transform. +// +// Computes the 3-dimensional discrete Fourier transform over the inner-most 3 +// dimensions of `input`. // // Arguments: -// tree_handle: Handle to the tree resource to be restored. -// tree_config: Serialied proto string of the boosted_trees.Tree proto. +// input: A complex64 tensor. // -// Returns the created operation. -func TensorForestTreeDeserialize(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "TensorForestTreeDeserialize", - Input: []tf.Input{ - tree_handle, tree_config, - }, - } - return scope.AddOperation(opspec) -} - -// Constructs an Optional variant from a tuple of tensors. -func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output) { +// Returns A complex64 tensor of the same shape as `input`. The inner-most 3 +// dimensions of `input` are replaced with their 3D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.fftn with 3 dimensions. +// @end_compatibility +func FFT3D(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "OptionalFromValue", + Type: "FFT3D", Input: []tf.Input{ - tf.OutputList(components), + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// DecodeProtoV2Attr is an optional argument to DecodeProtoV2. -type DecodeProtoV2Attr func(optionalAttr) - -// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value. -// -// value: Either the special value `local://` or a path to a file containing -// a serialized `FileDescriptorSet`. -// If not specified, defaults to "local://" -func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { - return func(m optionalAttr) { - m["descriptor_source"] = value - } -} - -// DecodeProtoV2MessageFormat sets the optional message_format attribute to value. -// -// value: Either `binary` or `text`. -// If not specified, defaults to "binary" -func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr { - return func(m optionalAttr) { - m["message_format"] = value - } -} +// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes. +type CropAndResizeGradBoxesAttr func(optionalAttr) -// DecodeProtoV2Sanitize sets the optional sanitize attribute to value. +// CropAndResizeGradBoxesMethod sets the optional method attribute to value. // -// value: Whether to sanitize the result or not. -// If not specified, defaults to false -func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr { +// value: A string specifying the interpolation method. Only 'bilinear' is +// supported for now. +// If not specified, defaults to "bilinear" +func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr { return func(m optionalAttr) { - m["sanitize"] = value + m["method"] = value } } -// The op extracts fields from a serialized protocol buffers message into tensors. -// -// The `decode_proto` op extracts fields from a serialized protocol buffers -// message into tensors. The fields in `field_names` are decoded and converted -// to the corresponding `output_types` if possible. -// -// A `message_type` name must be provided to give context for the field -// names. The actual message descriptor can be looked up either in the -// linked-in descriptor pool or a filename provided by the caller using -// the `descriptor_source` attribute. -// -// Each output tensor is a dense tensor. This means that it is padded to -// hold the largest number of repeated elements seen in the input -// minibatch. (The shape is also padded by one to prevent zero-sized -// dimensions). The actual repeat counts for each example in the -// minibatch can be found in the `sizes` output. In many cases the output -// of `decode_proto` is fed immediately into tf.squeeze if missing values -// are not a concern. When using tf.squeeze, always pass the squeeze -// dimension explicitly to avoid surprises. -// -// For the most part, the mapping between Proto field types and -// TensorFlow dtypes is straightforward. However, there are a few -// special cases: -// -// - A proto field that contains a submessage or group can only be converted -// to `DT_STRING` (the serialized submessage). This is to reduce the -// complexity of the API. The resulting string can be used as input -// to another instance of the decode_proto op. -// -// - TensorFlow lacks support for unsigned integers. The ops represent uint64 -// types as a `DT_INT64` with the same twos-complement bit pattern -// (the obvious way). Unsigned int32 values can be represented exactly by -// specifying type `DT_INT64`, or using twos-complement if the caller -// specifies `DT_INT32` in the `output_types` attribute. -// -// The `descriptor_source` attribute selects a source of protocol -// descriptors to consult when looking up `message_type`. This may be a -// filename containing a serialized `FileDescriptorSet` message, -// or the special value `local://`, in which case only descriptors linked -// into the code will be searched; the filename can be on any filesystem -// accessible to TensorFlow. -// -// You can build a `descriptor_source` file using the `--descriptor_set_out` -// and `--include_imports` options to the protocol compiler `protoc`. -// -// The `local://` database only covers descriptors linked into the -// code via C++ libraries, not Python imports. You can link in a proto descriptor -// by creating a cc_library target with alwayslink=1. -// -// Both binary and text proto serializations are supported, and can be -// chosen using the `format` attribute. +// Computes the gradient of the crop_and_resize op wrt the input boxes tensor. // // Arguments: -// bytes: Tensor of serialized protos with shape `batch_shape`. -// message_type: Name of the proto message type to decode. -// field_names: List of strings containing proto field names. An extension field can be decoded -// by using its full name, e.g. EXT_PACKAGE.EXT_FIELD_NAME. -// output_types: List of TF types to use for the respective field in field_names. +// grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. +// image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. +// Both `image_height` and `image_width` need to be positive. +// boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor +// specifies the coordinates of a box in the `box_ind[i]` image and is specified +// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of +// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the +// `[0, 1]` interval of normalized image height is mapped to +// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in +// which case the sampled crop is an up-down flipped version of the original +// image. The width dimension is treated similarly. Normalized coordinates +// outside the `[0, 1]` range are allowed, in which case we use +// `extrapolation_value` to extrapolate the input image values. +// box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. +// The value of `box_ind[i]` specifies the image that the `i`-th box refers to. // -// Returns Tensor of int32 with shape `[batch_shape, len(field_names)]`. -// Each entry is the number of values found for the corresponding field. -// Optional fields may have 0 or 1 values.List of tensors containing values for the corresponding field. -// `values[i]` has datatype `output_types[i]` -// and shape `[batch_shape, max(sizes[...,i])]`. -func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) { +// Returns A 2-D tensor of shape `[num_boxes, 4]`. +func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "DecodeProtoV2", + Type: "CropAndResizeGradBoxes", Input: []tf.Input{ - bytes, + grads, image, boxes, box_ind, }, Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Greedily selects a subset of bounding boxes in descending order of score, +// +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes with score less than +// `score_threshold` are removed. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system and more +// generally is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// selected_indices = tf.image.non_max_suppression_v2( +// boxes, scores, max_output_size, iou_threshold, score_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) +// +// Arguments: +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. +// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove +// boxes based on score. +// +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`. +func NonMaxSuppressionV3(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - sizes = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("DecodeProtoV2", err) - return + opspec := tf.OpSpec{ + Type: "NonMaxSuppressionV3", + Input: []tf.Input{ + boxes, scores, max_output_size, iou_threshold, score_threshold, + }, } - return sizes, values + op := scope.AddOperation(opspec) + return op.Output(0) } -// Creates an Optional variant with no value. -func OptionalNone(scope *Scope) (optional tf.Output) { +// NonMaxSuppressionV4Attr is an optional argument to NonMaxSuppressionV4. +type NonMaxSuppressionV4Attr func(optionalAttr) + +// NonMaxSuppressionV4PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value. +// +// value: If true, the output `selected_indices` is padded to be of length +// `max_output_size`. Defaults to false. +// If not specified, defaults to false +func NonMaxSuppressionV4PadToMaxOutputSize(value bool) NonMaxSuppressionV4Attr { + return func(m optionalAttr) { + m["pad_to_max_output_size"] = value + } +} + +// Greedily selects a subset of bounding boxes in descending order of score, +// +// pruning away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes with score less than +// `score_threshold` are removed. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system and more +// generally is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// The output of this operation is a set of integers indexing into the input +// collection of bounding boxes representing the selected boxes. The bounding +// box coordinates corresponding to the selected indices can then be obtained +// using the `tf.gather operation`. For example: +// selected_indices = tf.image.non_max_suppression_v2( +// boxes, scores, max_output_size, iou_threshold, score_threshold) +// selected_boxes = tf.gather(boxes, selected_indices) +// +// Arguments: +// boxes: A 2-D float tensor of shape `[num_boxes, 4]`. +// scores: A 1-D float tensor of shape `[num_boxes]` representing a single +// score corresponding to each box (each row of boxes). +// max_output_size: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. +// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove +// boxes based on score. +// +// Returns A 1-D integer tensor of shape `[M]` representing the selected +// indices from the boxes tensor, where `M <= max_output_size`.A 0-D integer tensor representing the number of valid elements in +// `selected_indices`, with the valid elements appearing first. +func NonMaxSuppressionV4(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...NonMaxSuppressionV4Attr) (selected_indices tf.Output, valid_outputs tf.Output) { if scope.Err() != nil { return } - opspec := tf.OpSpec{ - Type: "OptionalNone", - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns true if and only if the given Optional variant has a value. -func OptionalHasValue(scope *Scope, optional tf.Output) (has_value tf.Output) { - if scope.Err() != nil { - return + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) } opspec := tf.OpSpec{ - Type: "OptionalHasValue", + Type: "NonMaxSuppressionV4", Input: []tf.Input{ - optional, + boxes, scores, max_output_size, iou_threshold, score_threshold, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1) } -// Returns the value stored in an Optional variant or raises an error if none exists. -func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { +// Removes keys and its associated values from a table. +// +// The tensor `keys` must of the same type as the keys of the table. Keys not +// already in the table are silently ignored. +// +// Arguments: +// table_handle: Handle to the table. +// keys: Any shape. Keys of the elements to remove. +// +// Returns the created operation. +func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "OptionalGetValue", + Type: "LookupTableRemoveV2", Input: []tf.Input{ - optional, + table_handle, keys, }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("OptionalGetValue", err) - return + return scope.AddOperation(opspec) +} + +// CombinedNonMaxSuppressionAttr is an optional argument to CombinedNonMaxSuppression. +type CombinedNonMaxSuppressionAttr func(optionalAttr) + +// CombinedNonMaxSuppressionPadPerClass sets the optional pad_per_class attribute to value. +// +// value: If false, the output nmsed boxes, scores and classes +// are padded/clipped to `max_total_size`. If true, the +// output nmsed boxes, scores and classes are padded to be of length +// `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in +// which case it is clipped to `max_total_size`. Defaults to false. +// If not specified, defaults to false +func CombinedNonMaxSuppressionPadPerClass(value bool) CombinedNonMaxSuppressionAttr { + return func(m optionalAttr) { + m["pad_per_class"] = value } - return components } -// Gets the next output from the given iterator as an Optional variant. -func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) { +// Greedily selects a subset of bounding boxes in descending order of score, +// +// This operation performs non_max_suppression on the inputs per batch, across +// all classes. +// Prunes away boxes that have high intersection-over-union (IOU) overlap +// with previously selected boxes. Bounding boxes are supplied as +// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any +// diagonal pair of box corners and the coordinates can be provided as normalized +// (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm +// is agnostic to where the origin is in the coordinate system. Also note that +// this algorithm is invariant to orthogonal transformations and translations +// of the coordinate system; thus translating or reflections of the coordinate +// system result in the same boxes being selected by the algorithm. +// The output of this operation is the final boxes, scores and classes tensor +// returned after performing non_max_suppression. +// +// Arguments: +// boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then +// same boxes are used for all classes otherwise, if `q` is equal to number of +// classes, class-specific boxes are used. +// scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]` +// representing a single score corresponding to each box (each row of boxes). +// max_output_size_per_class: A scalar integer tensor representing the maximum number of +// boxes to be selected by non max suppression per class +// max_total_size: A scalar representing maximum number of boxes retained over all classes. +// iou_threshold: A 0-D float tensor representing the threshold for deciding whether +// boxes overlap too much with respect to IOU. +// score_threshold: A 0-D float tensor representing the threshold for deciding when to remove +// boxes based on score. +// +// Returns A [batch_size, max_detections, 4] float32 tensor +// containing the non-max suppressed boxes.A [batch_size, max_detections] float32 tensor +// containing the scores for the boxes.A [batch_size, max_detections] float32 tensor +// containing the classes for the boxes.A [batch_size] int32 tensor indicating the number of +// valid detections per batch item. Only the top num_detections[i] entries in +// nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the +// entries are zero paddings. +func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size_per_class tf.Output, max_total_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...CombinedNonMaxSuppressionAttr) (nmsed_boxes tf.Output, nmsed_scores tf.Output, nmsed_classes tf.Output, valid_detections tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "IteratorGetNextAsOptional", + Type: "CombinedNonMaxSuppression", Input: []tf.Input{ - iterator, + boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + return op.Output(0), op.Output(1), op.Output(2), op.Output(3) } -// Fast Fourier transform. +// Computes the matrix logarithm of one or more square matrices: // -// Computes the 1-dimensional discrete Fourier transform over the inner-most -// dimension of `input`. +// +// \\(log(exp(A)) = A\\) +// +// This op is only defined for complex matrices. If A is positive-definite and +// real, then casting to a complex matrix, taking the logarithm and casting back +// to a real matrix will give the correct result. +// +// This function computes the matrix logarithm using the Schur-Parlett algorithm. +// Details of the algorithm can be found in Section 11.6.2 of: +// Nicholas J. Higham, Functions of Matrices: Theory and Computation, SIAM 2008. +// ISBN 978-0-898716-46-7. +// +// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions +// form square matrices. The output is a tensor of the same shape as the input +// containing the exponential for all input submatrices `[..., :, :]`. // // Arguments: -// input: A complex tensor. +// input: Shape is `[..., M, M]`. // -// Returns A complex tensor of the same shape as `input`. The inner-most -// dimension of `input` is replaced with its 1D Fourier transform. +// Returns Shape is `[..., M, M]`. // -// @compatibility(numpy) -// Equivalent to np.fft.fft +// @compatibility(scipy) +// Equivalent to scipy.linalg.logm // @end_compatibility -func FFT(scope *Scope, input tf.Output) (output tf.Output) { +func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "FFT", + Type: "MatrixLogarithm", Input: []tf.Input{ input, }, @@ -34412,294 +37186,324 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } -// Identity transformation that models performance. -// -// Identity transformation that models performance. +// This op is used as a placeholder in If branch functions. It doesn't provide a +// valid output when run, so must either be removed (e.g. replaced with a +// function input) or guaranteed not to be used (e.g. if mirroring an +// intermediate output needed for the gradient computation of the other branch). // // Arguments: -// input_dataset: A variant tensor representing the input dataset. -// +// dtype: The type of the output. +// shape: The purported shape of the output. This is only used for shape inference; +// the output will not necessarily have this shape. Can be a partial shape. // -func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Returns \"Fake\" output value. This should not be consumed by another op. +func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"dtype": dtype, "shape": shape} opspec := tf.OpSpec{ - Type: "ModelDataset", - Input: []tf.Input{ - input_dataset, - }, + Type: "FakeParam", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns the truth value of (x > y) element-wise. +// Returns the next representable value of `x1` in the direction of `x2`, element-wise. // -// *NOTE*: `Greater` supports broadcasting. More about broadcasting -// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) -func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { +// This operation returns the same result as the C++ std::nextafter function. +// +// It can also return a subnormal number. +// +// @compatibility(cpp) +// Equivalent to C++ std::nextafter function. +// @end_compatibility +func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "Greater", + Type: "NextAfter", Input: []tf.Input{ - x, y, + x1, x2, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Applies sparse addition to `input` using individual values or slices -// -// from `updates` according to indices `indices`. The updates are non-aliasing: -// `input` is only modified in-place if no other operations will use it. -// Otherwise, a copy of `input` is made. This operation has a gradient with -// respect to both `input` and `updates`. -// -// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `input`. -// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. +// OrderedMapStageAttr is an optional argument to OrderedMapStage. +type OrderedMapStageAttr func(optionalAttr) + +// OrderedMapStageCapacity sets the optional capacity attribute to value. // -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or `(P-K)`-dimensional slices -// (if `K < P`) along the `K`th dimension of `input`. +// value: Maximum number of elements in the Staging Area. If > 0, inserts +// on the container will block when the capacity is reached. +// If not specified, defaults to 0 // -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: +// REQUIRES: value >= 0 +func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ +// REQUIRES: value >= 0 +func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// OrderedMapStageContainer sets the optional container attribute to value. // -// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 -// elements. In Python, that addition would look like this: +// value: If non-empty, this queue is placed in the given container. Otherwise, +// a default container is used. +// If not specified, defaults to "" +func OrderedMapStageContainer(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// OrderedMapStageSharedName sets the optional shared_name attribute to value. // -// input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8]) -// indices = tf.constant([[4], [3], [1], [7]]) -// updates = tf.constant([9, 10, 11, 12]) -// output = tf.scatter_nd_non_aliasing_add(input, indices, updates) -// with tf.Session() as sess: -// print(sess.run(output)) +// value: It is necessary to match this name to the matching Unstage Op. +// If not specified, defaults to "" +func OrderedMapStageSharedName(value string) OrderedMapStageAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Stage (key, values) in the underlying container which behaves like a ordered // -// The resulting value `output` would look like this: +// associative container. Elements are ordered by key. // -// [1, 13, 3, 14, 14, 6, 7, 20] +// Arguments: +// key: int64 // -// See `tf.scatter_nd` for more details about how to make updates to slices. +// values: a list of tensors +// dtypes A list of data types that inserted values should adhere to. // -// Arguments: -// input: A Tensor. -// indices: A Tensor. Must be one of the following types: `int32`, `int64`. -// A tensor of indices into `input`. -// updates: A Tensor. Must have the same type as ref. A tensor of updated values -// to add to `input`. // -// Returns A `Tensor` with the same shape as `input`, containing values of `input` -// updated with `updates`. -func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) { +// Returns the created operation. +func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } opspec := tf.OpSpec{ - Type: "ScatterNdNonAliasingAdd", + Type: "OrderedMapStage", Input: []tf.Input{ - input, indices, updates, + key, indices, tf.OutputList(values), }, + Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool. -type FractionalMaxPoolAttr func(optionalAttr) +// StackPushV2Attr is an optional argument to StackPushV2. +type StackPushV2Attr func(optionalAttr) -// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value. +// StackPushV2SwapMemory sets the optional swap_memory attribute to value. // -// value: When set to True, generates the pooling sequence in a -// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin -// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for -// difference between pseudorandom and random. +// value: Swap `elem` to CPU. Default to false. // If not specified, defaults to false -func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr { +func StackPushV2SwapMemory(value bool) StackPushV2Attr { return func(m optionalAttr) { - m["pseudo_random"] = value + m["swap_memory"] = value } } -// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value. -// -// value: When set to True, it means when pooling, the values at the boundary -// of adjacent pooling cells are used by both cells. For example: -// -// `index 0 1 2 3 4` +// Push an element onto the stack. // -// `value 20 5 16 3 7` +// Arguments: +// handle: The handle to a stack. +// elem: The tensor to be pushed onto the stack. // -// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice. -// The result would be [20, 16] for fractional max pooling. -// If not specified, defaults to false -func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr { - return func(m optionalAttr) { - m["overlapping"] = value +// Returns The same tensor as the input 'elem'. +func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StackPushV2", + Input: []tf.Input{ + handle, elem, + }, + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value. +// RpcAttr is an optional argument to Rpc. +type RpcAttr func(optionalAttr) + +// RpcProtocol sets the optional protocol attribute to value. // -// value: When set to True, a fixed pooling region will be used when -// iterating over a FractionalMaxPool node in the computation graph. Mainly used -// in unit test to make FractionalMaxPool deterministic. -// If not specified, defaults to false -func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr { +// value: RPC protocol to use. Empty string means use the default protocol. +// Options include 'grpc'. +// If not specified, defaults to "" +func RpcProtocol(value string) RpcAttr { return func(m optionalAttr) { - m["deterministic"] = value + m["protocol"] = value } } -// FractionalMaxPoolSeed sets the optional seed attribute to value. +// RpcFailFast sets the optional fail_fast attribute to value. // -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr { +// value: `boolean`. If `true` (default), then failures to connect +// (i.e., the server does not immediately respond) cause an RPC failure. +// If not specified, defaults to true +func RpcFailFast(value bool) RpcAttr { return func(m optionalAttr) { - m["seed"] = value + m["fail_fast"] = value } } -// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value. +// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value. // -// value: An second seed to avoid seed collision. +// value: `int`. If `0` (default), then the kernel will run the RPC +// request and only time out if the RPC deadline passes or the session times out. +// If this value is greater than `0`, then the op will raise an exception if +// the RPC takes longer than `timeout_in_ms`. // If not specified, defaults to 0 -func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr { +func RpcTimeoutInMs(value int64) RpcAttr { return func(m optionalAttr) { - m["seed2"] = value + m["timeout_in_ms"] = value } } -// Performs fractional max pooling on the input. +// Perform batches of RPC requests. // -// Fractional max pooling is slightly different than regular max pooling. In -// regular max pooling, you downsize an input set by taking the maximum value of -// smaller N x N subsections of the set (often 2x2), and try to reduce the set by -// a factor of N, where N is an integer. Fractional max pooling, as you might -// expect from the word "fractional", means that the overall reduction ratio N -// does not have to be an integer. +// This op asynchronously performs either a single RPC request, or a batch +// of requests. RPC requests are defined by three main parameters: // -// The sizes of the pooling regions are generated randomly but are fairly uniform. -// For example, let's look at the height dimension, and the constraints on the -// list of rows that will be pool boundaries. +// - `address` (the host+port or BNS address of the request) +// - `method` (the RPC method name for the request) +// - `request` (the serialized proto string, or vector of strings, +// of the RPC request argument). // -// First we define the following: +// For example, if you have an RPC service running on port localhost:2345, +// and its interface is configured with the following proto declaration: // -// 1. input_row_length : the number of rows from the input set -// 2. output_row_length : which will be smaller than the input -// 3. alpha = input_row_length / output_row_length : our reduction ratio -// 4. K = floor(alpha) -// 5. row_pooling_sequence : this is the result list of pool boundary rows +// ``` +// service MyService { +// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { +// } +// }; +// ``` // -// Then, row_pooling_sequence should satisfy: +// then call this op with arguments: // -// 1. a[0] = 0 : the first value of the sequence is 0 -// 2. a[end] = input_row_length : the last value of the sequence is the size -// 3. K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size -// 4. length(row_pooling_sequence) = output_row_length+1 +// ``` +// address = "localhost:2345" +// method = "MyService/MyMethod" +// ``` // -// For more details on fractional max pooling, see this paper: -// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) +// The `request` tensor is a string tensor representing serialized `MyRequestProto` +// strings; and the output string tensor `response` will have the same shape +// and contain (upon successful completion) corresponding serialized +// `MyResponseProto` strings. +// +// For example, to send a single, empty, `MyRequestProto`, call +// this op with `request = ""`. To send 5 **parallel** empty requests, +// call this op with `request = ["", "", "", "", ""]`. +// +// More generally, one can create a batch of `MyRequestProto` serialized protos +// from regular batched tensors using the `encode_proto` op, and convert +// the response `MyResponseProto` serialized protos to batched tensors +// using the `decode_proto` op. +// +// **NOTE** Working with serialized proto strings is faster than instantiating +// actual proto objects in memory, so no performance degradation is expected +// compared to writing custom kernels for this workflow. +// +// If the connection fails or the remote worker returns an error +// status, the op reraises this exception locally. +// +// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph. // // Arguments: -// value: 4-D with shape `[batch, height, width, channels]`. -// pooling_ratio: Pooling ratio for each dimension of `value`, currently only -// supports row and col dimension and should be >= 1.0. For example, a valid -// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements -// must be 1.0 because we don't allow pooling on batch and channels -// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions -// respectively. +// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `method` and `request`. +// method: `0-D` or `1-D`. The method address on the RPC server. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `request`. +// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. +// If this tensor has more than 1 element, then multiple parallel rpc requests +// are sent. This argument broadcasts with `address` and `method`. // -// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient. -func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) { +// Returns Same shape as `request`. Serialized proto strings: the rpc responses. +func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"pooling_ratio": pooling_ratio} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "FractionalMaxPool", + Type: "Rpc", Input: []tf.Input{ - value, + address, method, request, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) + return op.Output(0) } -// Creates a MultiDeviceIterator resource. -// -// Arguments: -// devices: A list of devices the iterator works across. -// shared_name: If non-empty, this resource will be shared under the given name -// across multiple sessions. -// container: If non-empty, this resource is placed in the given container. -// Otherwise, a default container is used. -// output_types: The type list for the return values. -// output_shapes: The list of shapes being produced. -// -// Returns Handle to the resource created. -func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Records the bytes size of each element of `input_dataset` in a StatsAggregator. +func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "MultiDeviceIterator", - + Type: "ExperimentalBytesProducedStatsDataset", + Input: []tf.Input{ + input_dataset, tag, + }, Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Performs a padding as a preprocess during a convolution. +// A substitute for `InterleaveDataset` on a fixed list of `N` datasets. // -// Similar to FusedResizeAndPadConv2d, this op allows for an optimized -// implementation where the spatial padding transformation stage is fused with the -// im2col lookup, but in this case without the bilinear filtering required for -// resizing. Fusing the padding prevents the need to write out the intermediate -// results as whole tensors, reducing memory pressure, and we can get some latency -// gains by merging the transformation calculations. -// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC' -// order is used instead. -// Internally this op uses a single per-graph scratch buffer, which means that it -// will block if multiple versions are being run in parallel. This is because this -// operator is primarily an optimization to minimize memory usage. +// Arguments: +// selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the +// `N` data inputs should produce the next output element. +// data_input_datasets: `N` datasets with the same type that will be interleaved according to +// the values of `selector_input_dataset`. // -// Arguments: -// input: 4-D with shape `[batch, in_height, in_width, in_channels]`. -// paddings: A two-column matrix specifying the padding sizes. The number of -// rows must be the same as the rank of `input`. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. // -// strides: 1-D of length 4. The stride of the sliding window for each dimension -// of `input`. Must be in the same order as the dimension specified with format. -// padding: The type of padding algorithm to use. -func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) { +func ExperimentalDirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "FusedPadConv2D", + Type: "ExperimentalDirectedInterleaveDataset", Input: []tf.Input{ - input, paddings, filter, + selector_input_dataset, tf.OutputList(data_input_datasets), }, Attrs: attrs, } @@ -34707,86 +37511,59 @@ func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf return op.Output(0) } -// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput. -type Conv2DBackpropInputAttr func(optionalAttr) - -// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value. -// If not specified, defaults to true -func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["use_cudnn_on_gpu"] = value - } -} +// RandomUniformIntAttr is an optional argument to RandomUniformInt. +type RandomUniformIntAttr func(optionalAttr) -// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value. +// RandomUniformIntSeed sets the optional seed attribute to value. // -// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith -// dimension, the amount of padding inserted before and after the dimension is -// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If -// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty. -// If not specified, defaults to <> -func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr { +// value: If either `seed` or `seed2` are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func RandomUniformIntSeed(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["explicit_paddings"] = value + m["seed"] = value } } -// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value. +// RandomUniformIntSeed2 sets the optional seed2 attribute to value. // -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr { +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomUniformIntSeed2(value int64) RandomUniformIntAttr { return func(m optionalAttr) { - m["data_format"] = value + m["seed2"] = value } } -// Conv2DBackpropInputDilations sets the optional dilations attribute to value. +// Outputs random integers from a uniform distribution. // -// value: 1-D tensor of length 4. The dilation factor for each dimension of -// `input`. If set to k > 1, there will be k-1 skipped cells between each filter -// element on that dimension. The dimension order is determined by the value of -// `data_format`, see above for details. Dilations in the batch and depth -// dimensions must be 1. -// If not specified, defaults to -func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr { - return func(m optionalAttr) { - m["dilations"] = value - } -} - -// Computes the gradients of convolution with respect to the input. +// The generated values are uniform integers in the range `[minval, maxval)`. +// The lower bound `minval` is included in the range, while the upper bound +// `maxval` is excluded. +// +// The random integers are slightly biased unless `maxval - minval` is an exact +// power of two. The bias is small for values of `maxval - minval` significantly +// smaller than the range of the output (either `2^32` or `2^64`). // // Arguments: -// input_sizes: An integer vector representing the shape of `input`, -// where `input` is a 4-D `[batch, height, width, channels]` tensor. -// filter: 4-D with shape -// `[filter_height, filter_width, in_channels, out_channels]`. -// out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`. -// Gradients w.r.t. the output of the convolution. -// strides: The stride of the sliding window for each dimension of the input -// of the convolution. Must be in the same order as the dimension specified with -// format. -// padding: The type of padding algorithm to use. +// shape: The shape of the output tensor. +// minval: 0-D. Inclusive lower bound on the generated integers. +// maxval: 0-D. Exclusive upper bound on the generated integers. // -// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`. Gradient -// w.r.t. the input of the convolution. -func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) { +// Returns A tensor of the specified shape filled with uniform random integers. +func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "Conv2DBackpropInput", + Type: "RandomUniformInt", Input: []tf.Input{ - input_sizes, filter, out_backprop, + shape, minval, maxval, }, Attrs: attrs, } @@ -34794,869 +37571,732 @@ func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, return op.Output(0) } -// Interleave the values from the `data` tensors into a single tensor. -// -// Builds a merged tensor such that -// -// ```python -// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] -// ``` -// -// For example, if each `indices[m]` is scalar or vector, we have -// -// ```python -// # Scalar indices: -// merged[indices[m], ...] = data[m][...] -// -// # Vector indices: -// merged[indices[m][i], ...] = data[m][i, ...] -// ``` -// -// Each `data[i].shape` must start with the corresponding `indices[i].shape`, -// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we -// must have `data[i].shape = indices[i].shape + constant`. In terms of this -// `constant`, the output shape is -// -// merged.shape = [max(indices)] + constant +// Add the quantile summaries to each quantile stream resource. // -// Values are merged in order, so if an index appears in both `indices[m][i]` and -// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the -// merged result. If you do not need this guarantee, ParallelDynamicStitch might -// perform better on some devices. +// An op that adds a list of quantile summaries to a quantile stream resource. Each +// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank) +// for a single feature. // -// For example: +// Arguments: +// quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource. +// summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature. // -// ```python -// indices[0] = 6 -// indices[1] = [4, 1] -// indices[2] = [[5, 2], [0, 3]] -// data[0] = [61, 62] -// data[1] = [[41, 42], [11, 12]] -// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] -// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], -// [51, 52], [61, 62]] -// ``` +// Returns the created operation. +func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "BoostedTreesQuantileStreamResourceAddSummaries", + Input: []tf.Input{ + quantile_stream_resource_handle, tf.OutputList(summaries), + }, + } + return scope.AddOperation(opspec) +} + +// Creates a Dataset that returns pseudorandom numbers. // -// This method can be used to merge partitions created by `dynamic_partition` -// as illustrated on the following example: +// Arguments: +// seed: A scalar seed for the random number generator. If either seed or +// seed2 is set to be non-zero, the random number generator is seeded +// by the given seed. Otherwise, a random seed is used. +// seed2: A second scalar seed to avoid seed collision. // -// ```python -// # Apply function (increments x_i) on elements for which a certain condition -// # apply (x_i != -1 in this example). -// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) -// condition_mask=tf.not_equal(x,tf.constant(-1.)) -// partitioned_data = tf.dynamic_partition( -// x, tf.cast(condition_mask, tf.int32) , 2) -// partitioned_data[1] = partitioned_data[1] + 1.0 -// condition_indices = tf.dynamic_partition( -// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) -// x = tf.dynamic_stitch(condition_indices, partitioned_data) -// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain -// # unchanged. -// ``` // -//
-// -//
-func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { +func ExperimentalRandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "DynamicStitch", + Type: "ExperimentalRandomDataset", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(data), + seed, seed2, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Interleave the values from the `data` tensors into a single tensor. -// -// Builds a merged tensor such that -// -// ```python -// merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...] -// ``` -// -// For example, if each `indices[m]` is scalar or vector, we have -// -// ```python -// # Scalar indices: -// merged[indices[m], ...] = data[m][...] -// -// # Vector indices: -// merged[indices[m][i], ...] = data[m][i, ...] -// ``` -// -// Each `data[i].shape` must start with the corresponding `indices[i].shape`, -// and the rest of `data[i].shape` must be constant w.r.t. `i`. That is, we -// must have `data[i].shape = indices[i].shape + constant`. In terms of this -// `constant`, the output shape is -// -// merged.shape = [max(indices)] + constant -// -// Values may be merged in parallel, so if an index appears in both `indices[m][i]` -// and `indices[n][j]`, the result may be invalid. This differs from the normal -// DynamicStitch operator that defines the behavior in that case. -// -// For example: +// Creates a dataset that overrides the maximum intra-op parallelism. // -// ```python -// indices[0] = 6 -// indices[1] = [4, 1] -// indices[2] = [[5, 2], [0, 3]] -// data[0] = [61, 62] -// data[1] = [[41, 42], [11, 12]] -// data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]] -// merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42], -// [51, 52], [61, 62]] -// ``` +// Arguments: // -// This method can be used to merge partitions created by `dynamic_partition` -// as illustrated on the following example: +// max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use. // -// ```python -// # Apply function (increments x_i) on elements for which a certain condition -// # apply (x_i != -1 in this example). -// x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4]) -// condition_mask=tf.not_equal(x,tf.constant(-1.)) -// partitioned_data = tf.dynamic_partition( -// x, tf.cast(condition_mask, tf.int32) , 2) -// partitioned_data[1] = partitioned_data[1] + 1.0 -// condition_indices = tf.dynamic_partition( -// tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2) -// x = tf.dynamic_stitch(condition_indices, partitioned_data) -// # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain -// # unchanged. -// ``` // -//
-// -//
-func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) { +func ExperimentalMaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ParallelDynamicStitch", + Type: "ExperimentalMaxIntraOpParallelismDataset", Input: []tf.Input{ - tf.OutputList(indices), tf.OutputList(data), + input_dataset, max_intra_op_parallelism, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// PriorityQueueV2Attr is an optional argument to PriorityQueueV2. -type PriorityQueueV2Attr func(optionalAttr) - -// PriorityQueueV2ComponentTypes sets the optional component_types attribute to value. -// -// value: The type of each component in a value. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["component_types"] = value - } -} +// StringSplitV2Attr is an optional argument to StringSplitV2. +type StringSplitV2Attr func(optionalAttr) -// PriorityQueueV2Capacity sets the optional capacity attribute to value. +// StringSplitV2Maxsplit sets the optional maxsplit attribute to value. // -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. +// value: An `int`. If `maxsplit > 0`, limit of the split of the result. // If not specified, defaults to -1 -func PriorityQueueV2Capacity(value int64) PriorityQueueV2Attr { +func StringSplitV2Maxsplit(value int64) StringSplitV2Attr { return func(m optionalAttr) { - m["capacity"] = value + m["maxsplit"] = value } } -// PriorityQueueV2Container sets the optional container attribute to value. +// Split elements of `source` based on `sep` into a `SparseTensor`. // -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func PriorityQueueV2Container(value string) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// PriorityQueueV2SharedName sets the optional shared_name attribute to value. +// Let N be the size of source (typically N will be the batch size). Split each +// element of `source` based on `sep` and return a `SparseTensor` +// containing the split tokens. Empty tokens are ignored. // -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func PriorityQueueV2SharedName(value string) PriorityQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that produces elements sorted by the first component value. +// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c', +// then the output will be +// ``` +// st.indices = [0, 0; +// 0, 1; +// 1, 0; +// 1, 1; +// 1, 2] +// st.shape = [2, 3] +// st.values = ['hello', 'world', 'a', 'b', 'c'] +// ``` // -// Note that the PriorityQueue requires the first component of any element -// to be a scalar int64, in addition to the other elements declared by -// component_types. Therefore calls to Enqueue and EnqueueMany (resp. Dequeue -// and DequeueMany) on a PriorityQueue will all require (resp. output) one extra -// entry in their input (resp. output) lists. +// If `sep` is given, consecutive delimiters are not grouped together and are +// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and +// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty +// string, consecutive whitespace are regarded as a single separator, and the +// result will contain no empty strings at the startor end if the string has +// leading or trailing whitespace. // -// Arguments: -// shapes: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. +// Note that the above mentioned behavior matches python's str.split. // -// Returns The handle to the queue. -func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV2Attr) (handle tf.Output) { +// Arguments: +// input: `1-D` string `Tensor`, the strings to split. +// sep: `0-D` string `Tensor`, the delimiter character. +func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"shapes": shapes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "PriorityQueueV2", - + Type: "StringSplitV2", + Input: []tf.Input{ + input, sep, + }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2. -type QueueEnqueueV2Attr func(optionalAttr) - -// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue is full, this operation will block for up to -// timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr { - return func(m optionalAttr) { - m["timeout_ms"] = value - } + return op.Output(0), op.Output(1), op.Output(2) } -// Enqueues a tuple of one or more tensors in the given queue. +// Creates a dataset that uses a custom thread pool to compute `input_dataset`. // -// The components input has k elements, which correspond to the components of -// tuples stored in the given queue. +// Arguments: // -// N.B. If the queue is full, this operation will block until the given -// element has been enqueued (or 'timeout_ms' elapses, if specified). +// thread_pool: A resource produced by the ThreadPoolHandle op. // -// Arguments: -// handle: The handle to a queue. -// components: One or more tensors from which the enqueued tensors should be taken. // -// Returns the created operation. -func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) { +func ExperimentalThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "QueueEnqueueV2", + Type: "ExperimentalThreadPoolDataset", Input: []tf.Input{ - handle, tf.OutputList(components), + input_dataset, thread_pool, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes the Bessel i0e function of `x` element-wise. -// -// Exponentially scaled modified Bessel function of order 0 defined as -// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`. -// -// This function is faster and numerically stabler than `bessel_i0(x)`. -func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) { +// Computes softsign: `features / (abs(features) + 1)`. +func Softsign(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "BesselI0e", + Type: "Softsign", Input: []tf.Input{ - x, + features, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2. -type QueueDequeueManyV2Attr func(optionalAttr) +// EncodeProtoAttr is an optional argument to EncodeProto. +type EncodeProtoAttr func(optionalAttr) -// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value. -// -// value: If the queue has fewer than n elements, this operation -// will block for up to timeout_ms milliseconds. -// Note: This option is not supported yet. -// If not specified, defaults to -1 -func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr { +// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value. +// If not specified, defaults to "local://" +func EncodeProtoDescriptorSource(value string) EncodeProtoAttr { return func(m optionalAttr) { - m["timeout_ms"] = value + m["descriptor_source"] = value } } -// Dequeues `n` tuples of one or more tensors from the given queue. +// The op serializes protobuf messages provided in the input tensors. // -// If the queue is closed and there are fewer than `n` elements, then an -// OutOfRange error is returned. +// The types of the tensors in `values` must match the schema for the +// fields specified in `field_names`. All the tensors in `values` must +// have a common shape prefix, *batch_shape*. // -// This operation concatenates queue-element component tensors along the -// 0th dimension to make a single component tensor. All of the components -// in the dequeued tuple will have size `n` in the 0th dimension. +// The `sizes` tensor specifies repeat counts for each field. The repeat +// count (last dimension) of a each tensor in `values` must be greater +// than or equal to corresponding repeat count in `sizes`. // -// This operation has `k` outputs, where `k` is the number of components in -// the tuples stored in the given queue, and output `i` is the ith -// component of the dequeued tuple. +// A `message_type` name must be provided to give context for the field +// names. The actual message descriptor can be looked up either in the +// linked-in descriptor pool or a filename provided by the caller using +// the `descriptor_source` attribute. // -// N.B. If the queue is empty, this operation will block until `n` elements -// have been dequeued (or 'timeout_ms' elapses, if specified). +// The `descriptor_source` attribute selects a source of protocol +// descriptors to consult when looking up `message_type`. This may be a +// filename containing a serialized `FileDescriptorSet` message, +// or the special value `local://`, in which case only descriptors linked +// into the code will be searched; the filename can be on any filesystem +// accessible to TensorFlow. +// +// You can build a `descriptor_source` file using the `--descriptor_set_out` +// and `--include_imports` options to the protocol compiler `protoc`. +// +// The `local://` database only covers descriptors linked into the +// code via C++ libraries, not Python imports. You can link in a proto descriptor +// by creating a cc_library target with alwayslink=1. +// +// There are a few special cases in the value mapping: +// +// Submessage and group fields must be pre-serialized as TensorFlow strings. +// +// TensorFlow lacks support for unsigned int64s, so they must be +// represented as `tf.int64` with the same twos-complement bit pattern +// (the obvious way). +// +// Unsigned int32 values can be represented exactly with `tf.int64`, or +// with sign wrapping if the input is of type `tf.int32`. +// +// Arguments: +// sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`. +// values: List of tensors containing values for the corresponding field. +// field_names: List of strings containing proto field names. +// message_type: Name of the proto message type to decode. +// +// Returns Tensor of serialized protos with shape `batch_shape`. +func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "EncodeProto", + Input: []tf.Input{ + sizes, tf.OutputList(values), + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that splits a SparseTensor into elements row-wise. +func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseTensorSliceDataset", + Input: []tf.Input{ + indices, values, dense_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns x / y element-wise for real types. // -// Arguments: -// handle: The handle to a queue. -// n: The number of tuples to dequeue. -// component_types: The type of each component in a tuple. +// If `x` and `y` are reals, this will return the floating-point division. // -// Returns One or more tensors that were dequeued as a tuple. -func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) { +// *NOTE*: `Div` supports broadcasting. More about broadcasting +// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html) +func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "QueueDequeueManyV2", + Type: "RealDiv", Input: []tf.Input{ - handle, n, + x, y, }, - Attrs: attrs, } op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Creates a dataset that concatenates `input_dataset` with `another_dataset`. +func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - var idx int - var err error - if components, idx, err = makeOutputList(op, idx, "components"); err != nil { - scope.UpdateErr("QueueDequeueManyV2", err) - return + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "ConcatenateDataset", + Input: []tf.Input{ + input_dataset, another_dataset, + }, + Attrs: attrs, } - return components + op := scope.AddOperation(opspec) + return op.Output(0) } -// Forwards the value of an available tensor from `inputs` to `output`. +// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors. // -// `Merge` waits for at least one of the tensors in `inputs` to become available. -// It is usually combined with `Switch` to implement branching. +// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the +// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each +// input channel is processed independently of the others with its own structuring +// function. The `output` tensor has shape +// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output +// tensor depend on the `padding` algorithm. We currently only support the default +// "NHWC" `data_format`. // -// `Merge` forwards the first tensor to become available to `output`, and sets -// `value_index` to its index in `inputs`. +// In detail, the grayscale morphological 2-D dilation is the max-sum correlation +// (for consistency with `conv2d`, we use unmirrored filters): +// +// output[b, y, x, c] = +// max_{dy, dx} input[b, +// strides[1] * y + rates[1] * dy, +// strides[2] * x + rates[2] * dx, +// c] + +// filter[dy, dx, c] +// +// Max-pooling is a special case when the filter has size equal to the pooling +// kernel size and contains all zeros. +// +// Note on duality: The dilation of `input` by the `filter` is equal to the +// negation of the erosion of `-input` by the reflected `filter`. // // Arguments: -// inputs: The input tensors, exactly one of which will become available. +// input: 4-D with shape `[batch, in_height, in_width, depth]`. +// filter: 3-D with shape `[filter_height, filter_width, depth]`. +// strides: The stride of the sliding window for each dimension of the input +// tensor. Must be: `[1, stride_height, stride_width, 1]`. +// rates: The input stride for atrous morphological dilation. Must be: +// `[1, rate_height, rate_width, 1]`. +// padding: The type of padding algorithm to use. // -// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`. -func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) { +// Returns 4-D with shape `[batch, out_height, out_width, depth]`. +func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding} opspec := tf.OpSpec{ - Type: "Merge", + Type: "Dilation2D", Input: []tf.Input{ - tf.OutputList(inputs), + input, filter, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Writes the given dataset to the given file using the TFRecord format. +// Converts the given variant tensor to an iterator and stores it in the given resource. // // Arguments: -// input_dataset: A variant tensor representing the dataset to write. -// filename: A scalar string tensor representing the filename to use. -// compression_type: A scalar string tensor containing either (i) the empty string (no -// compression), (ii) "ZLIB", or (iii) "GZIP". +// resource_handle: A handle to an iterator resource. +// serialized: A variant tensor storing the state of the iterator contained in the +// resource. // // Returns the created operation. -func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) { +func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "ExperimentalDatasetToTFRecord", + Type: "DeserializeIterator", Input: []tf.Input{ - input_dataset, filename, compression_type, + resource_handle, serialized, }, } return scope.AddOperation(opspec) } -// QueueCloseV2Attr is an optional argument to QueueCloseV2. -type QueueCloseV2Attr func(optionalAttr) - -// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value. -// -// value: If true, all pending enqueue requests that are -// blocked on the given queue will be canceled. -// If not specified, defaults to false -func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr { - return func(m optionalAttr) { - m["cancel_pending_enqueues"] = value - } -} - -// Closes the given queue. +// Creates a dataset that shuffles and repeats elements from `input_dataset` // -// This operation signals that no more elements will be enqueued in the -// given queue. Subsequent Enqueue(Many) operations will fail. -// Subsequent Dequeue(Many) operations will continue to succeed if -// sufficient elements remain in the queue. Subsequent Dequeue(Many) -// operations that would block will fail immediately. +// pseudorandomly. // // Arguments: -// handle: The handle to a queue. // -// Returns the created operation. -func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) { +// buffer_size: The number of output elements to buffer in an iterator over +// this dataset. Compare with the `min_after_dequeue` attr when creating a +// `RandomShuffleQueue`. +// seed: A scalar seed for the random number generator. If either `seed` or +// `seed2` is set to be non-zero, the random number generator is seeded +// by the given seed. Otherwise, a random seed is used. +// seed2: A second scalar seed to avoid seed collision. +// count: A scalar representing the number of times the underlying dataset +// should be repeated. The default is `-1`, which results in infinite repetition. +// +// +func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "QueueCloseV2", + Type: "ShuffleAndRepeatDataset", Input: []tf.Input{ - handle, + input_dataset, buffer_size, seed, seed2, count, }, Attrs: attrs, } - return scope.AddOperation(opspec) + op := scope.AddOperation(opspec) + return op.Output(0) } -// Computes inverse hyperbolic tangent of x element-wise. -func Atanh(scope *Scope, x tf.Output) (y tf.Output) { +// Creates a dataset that caches elements from `input_dataset`. +// +// A CacheDataset will iterate over the input_dataset, and store tensors. If the +// cache already exists, the cache will be used. If the cache is inappropriate +// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error +// will the returned when used. +// +// Arguments: +// +// filename: A path on the filesystem where we should cache the dataset. Note: this +// will be a directory. +// +// +func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "Atanh", + Type: "CacheDataset", Input: []tf.Input{ - x, + input_dataset, filename, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Returns true if queue is closed. -// -// This operation returns true if the queue is closed and false if the queue -// is open. +// Creates a dataset that emits the records from one or more binary files. // // Arguments: -// handle: The handle to a queue. -func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) { +// filenames: A scalar or a vector containing the name(s) of the file(s) to be +// read. +// header_bytes: A scalar representing the number of bytes to skip at the +// beginning of a file. +// record_bytes: A scalar representing the number of bytes in each record. +// footer_bytes: A scalar representing the number of bytes to skip at the end +// of a file. +// buffer_size: A scalar representing the number of bytes to buffer. Must be > 0. +func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "QueueIsClosedV2", + Type: "FixedLengthRecordDataset", Input: []tf.Input{ - handle, + filenames, header_bytes, record_bytes, footer_bytes, buffer_size, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the absolute value of a tensor. +// Gradients for batch normalization. // -// Given a tensor `x`, this operation returns a tensor containing the absolute -// value of each element in `x`. For example, if x is an input element and y is -// an output element, this operation computes \\(y = |x|\\). -func Abs(scope *Scope, x tf.Output) (y tf.Output) { +// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization() +// +// This op is deprecated. See `tf.nn.batch_normalization`. +// +// Arguments: +// t: A 4D input Tensor. +// m: A 1D mean Tensor with size matching the last dimension of t. +// This is the first output from tf.nn.moments, +// or a saved moving average thereof. +// v: A 1D variance Tensor with size matching the last dimension of t. +// This is the second output from tf.nn.moments, +// or a saved moving average thereof. +// gamma: A 1D gamma Tensor with size matching the last dimension of t. +// If "scale_after_normalization" is true, this Tensor will be multiplied +// with the normalized Tensor. +// backprop: 4D backprop Tensor. +// variance_epsilon: A small float number to avoid dividing by 0. +// scale_after_normalization: A bool indicating whether the resulted tensor +// needs to be multiplied with gamma. +// +// Returns 4D backprop tensor for input.1D backprop tensor for mean.1D backprop tensor for variance.1D backprop tensor for beta.1D backprop tensor for gamma. +func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization} opspec := tf.OpSpec{ - Type: "Abs", + Type: "BatchNormWithGlobalNormalizationGrad", Input: []tf.Input{ - x, + t, m, v, gamma, backprop, }, + Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// StackV2Attr is an optional argument to StackV2. -type StackV2Attr func(optionalAttr) - -// StackV2StackName sets the optional stack_name attribute to value. -// -// value: Overrides the name used for the temporary stack resource. Default -// value is the name of the 'Stack' op (which is guaranteed unique). -// If not specified, defaults to "" -func StackV2StackName(value string) StackV2Attr { - return func(m optionalAttr) { - m["stack_name"] = value - } + return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) } -// A stack that produces elements in first-in last-out order. +// Creates a dataset that emits the records from one or more TFRecord files. // // Arguments: -// max_size: The maximum size of the stack if non-negative. If negative, the stack -// size is unlimited. -// elem_type: The type of the elements on the stack. -// -// Returns The handle to the stack. -func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional ...StackV2Attr) (handle tf.Output) { +// filenames: A scalar or vector containing the name(s) of the file(s) to be +// read. +// compression_type: A scalar containing either (i) the empty string (no +// compression), (ii) "ZLIB", or (iii) "GZIP". +// buffer_size: A scalar representing the number of bytes to buffer. A value of +// 0 means no buffering will be performed. +func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"elem_type": elem_type} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StackV2", + Type: "TFRecordDataset", Input: []tf.Input{ - max_size, + filenames, compression_type, buffer_size, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// OrderedMapStageAttr is an optional argument to OrderedMapStage. -type OrderedMapStageAttr func(optionalAttr) - -// OrderedMapStageCapacity sets the optional capacity attribute to value. -// -// value: Maximum number of elements in the Staging Area. If > 0, inserts -// on the container will block when the capacity is reached. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageCapacity(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} +// ExperimentalStatsAggregatorHandleAttr is an optional argument to ExperimentalStatsAggregatorHandle. +type ExperimentalStatsAggregatorHandleAttr func(optionalAttr) -// OrderedMapStageContainer sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. Otherwise, -// a default container is used. +// ExperimentalStatsAggregatorHandleContainer sets the optional container attribute to value. // If not specified, defaults to "" -func OrderedMapStageContainer(value string) OrderedMapStageAttr { +func ExperimentalStatsAggregatorHandleContainer(value string) ExperimentalStatsAggregatorHandleAttr { return func(m optionalAttr) { m["container"] = value } } -// OrderedMapStageSharedName sets the optional shared_name attribute to value. -// -// value: It is necessary to match this name to the matching Unstage Op. +// ExperimentalStatsAggregatorHandleSharedName sets the optional shared_name attribute to value. // If not specified, defaults to "" -func OrderedMapStageSharedName(value string) OrderedMapStageAttr { +func ExperimentalStatsAggregatorHandleSharedName(value string) ExperimentalStatsAggregatorHandleAttr { return func(m optionalAttr) { m["shared_name"] = value } } -// Stage (key, values) in the underlying container which behaves like a ordered -// -// associative container. Elements are ordered by key. -// -// Arguments: -// key: int64 -// -// values: a list of tensors -// dtypes A list of data types that inserted values should adhere to. -// -// -// Returns the created operation. -func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) { +// Creates a statistics manager resource. +func ExperimentalStatsAggregatorHandle(scope *Scope, optional ...ExperimentalStatsAggregatorHandleAttr) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtypes": dtypes} + attrs := map[string]interface{}{} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "OrderedMapStage", - Input: []tf.Input{ - key, indices, tf.OutputList(values), - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// RpcAttr is an optional argument to Rpc. -type RpcAttr func(optionalAttr) + Type: "ExperimentalStatsAggregatorHandle", -// RpcProtocol sets the optional protocol attribute to value. -// -// value: RPC protocol to use. Empty string means use the default protocol. -// Options include 'grpc'. -// If not specified, defaults to "" -func RpcProtocol(value string) RpcAttr { - return func(m optionalAttr) { - m["protocol"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// RpcFailFast sets the optional fail_fast attribute to value. +// A container for an iterator resource. // -// value: `boolean`. If `true` (default), then failures to connect -// (i.e., the server does not immediately respond) cause an RPC failure. -// If not specified, defaults to true -func RpcFailFast(value bool) RpcAttr { - return func(m optionalAttr) { - m["fail_fast"] = value +// Returns A handle to the iterator that can be passed to a "MakeIterator" or +// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents +// resource sharing by name, and does not keep a reference to the resource +// container. +func AnonymousIterator(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { + if scope.Err() != nil { + return } -} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "AnonymousIterator", -// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value. -// -// value: `int`. If `0` (default), then the kernel will run the RPC -// request and only time out if the RPC deadline passes or the session times out. -// If this value is greater than `0`, then the op will raise an exception if -// the RPC takes longer than `timeout_in_ms`. -// If not specified, defaults to 0 -func RpcTimeoutInMs(value int64) RpcAttr { - return func(m optionalAttr) { - m["timeout_in_ms"] = value + Attrs: attrs, } + op := scope.AddOperation(opspec) + return op.Output(0) } -// Perform batches of RPC requests. -// -// This op asynchronously performs either a single RPC request, or a batch -// of requests. RPC requests are defined by three main parameters: -// -// - `address` (the host+port or BNS address of the request) -// - `method` (the RPC method name for the request) -// - `request` (the serialized proto string, or vector of strings, -// of the RPC request argument). -// -// For example, if you have an RPC service running on port localhost:2345, -// and its interface is configured with the following proto declaration: -// -// ``` -// service MyService { -// rpc MyMethod(MyRequestProto) returns (MyResponseProto) { -// } -// }; -// ``` -// -// then call this op with arguments: -// -// ``` -// address = "localhost:2345" -// method = "MyService/MyMethod" -// ``` -// -// The `request` tensor is a string tensor representing serialized `MyRequestProto` -// strings; and the output string tensor `response` will have the same shape -// and contain (upon successful completion) corresponding serialized -// `MyResponseProto` strings. -// -// For example, to send a single, empty, `MyRequestProto`, call -// this op with `request = ""`. To send 5 **parallel** empty requests, -// call this op with `request = ["", "", "", "", ""]`. -// -// More generally, one can create a batch of `MyRequestProto` serialized protos -// from regular batched tensors using the `encode_proto` op, and convert -// the response `MyResponseProto` serialized protos to batched tensors -// using the `decode_proto` op. +// Adjust the contrast of one or more images. // -// **NOTE** Working with serialized proto strings is faster than instantiating -// actual proto objects in memory, so no performance degradation is expected -// compared to writing custom kernels for this workflow. +// `images` is a tensor of at least 3 dimensions. The last 3 dimensions are +// interpreted as `[height, width, channels]`. The other dimensions only +// represent a collection of images, such as `[batch, height, width, channels].` // -// If the connection fails or the remote worker returns an error -// status, the op reraises this exception locally. +// Contrast is adjusted independently for each channel of each image. // -// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph. +// For each channel, the Op first computes the mean of the image pixels in the +// channel and then adjusts each component of each pixel to +// `(x - mean) * contrast_factor + mean`. // // Arguments: -// address: `0-D` or `1-D`. The address (i.e. host_name:port) of the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `method` and `request`. -// method: `0-D` or `1-D`. The method address on the RPC server. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `request`. -// request: `0-D` or `1-D`. Serialized proto strings: the rpc request argument. -// If this tensor has more than 1 element, then multiple parallel rpc requests -// are sent. This argument broadcasts with `address` and `method`. +// images: Images to adjust. At least 3-D. +// contrast_factor: A float multiplier for adjusting contrast. // -// Returns Same shape as `request`. Serialized proto strings: the rpc responses. -func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) { +// Returns The contrast-adjusted image or images. +func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "Rpc", + Type: "AdjustContrastv2", Input: []tf.Input{ - address, method, request, + images, contrast_factor, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Records the bytes size of each element of `input_dataset` in a StatsAggregator. -func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { +// Gets the next output from the given iterator . +func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { return } attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "ExperimentalBytesProducedStatsDataset", + Type: "IteratorGetNext", Input: []tf.Input{ - input_dataset, tag, + iterator, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("IteratorGetNext", err) + return + } + return components } -// StackPushV2Attr is an optional argument to StackPushV2. -type StackPushV2Attr func(optionalAttr) - -// StackPushV2SwapMemory sets the optional swap_memory attribute to value. +// Outputs the single element from the given dataset. // -// value: Swap `elem` to CPU. Default to false. -// If not specified, defaults to false -func StackPushV2SwapMemory(value bool) StackPushV2Attr { - return func(m optionalAttr) { - m["swap_memory"] = value +// Arguments: +// dataset: A handle to a dataset that contains a single element. +// +// +// +// Returns The components of the single element of `input`. +func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "DatasetToSingleElement", + Input: []tf.Input{ + dataset, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("DatasetToSingleElement", err) + return } + return components } -// Push an element onto the stack. +// Converts the given `resource_handle` representing an iterator to a string. // // Arguments: -// handle: The handle to a stack. -// elem: The tensor to be pushed onto the stack. +// resource_handle: A handle to an iterator resource. // -// Returns The same tensor as the input 'elem'. -func StackPushV2(scope *Scope, handle tf.Output, elem tf.Output, optional ...StackPushV2Attr) (output tf.Output) { +// Returns A string representation of the given handle. +func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } opspec := tf.OpSpec{ - Type: "StackPushV2", + Type: "IteratorToStringHandle", Input: []tf.Input{ - handle, elem, + resource_handle, }, - Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2. -type FusedBatchNormGradV2Attr func(optionalAttr) +// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle. +type IteratorFromStringHandleAttr func(optionalAttr) -// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value. +// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value. // -// value: A small float number added to the variance of x. -// If not specified, defaults to 0.0001 -func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr { - return func(m optionalAttr) { - m["epsilon"] = value - } -} - -// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value. +// value: If specified, defines the type of each tuple component in an +// element produced by the resulting iterator. +// If not specified, defaults to <> // -// value: The data format for y_backprop, x, x_backprop. -// Either "NHWC" (default) or "NCHW". -// If not specified, defaults to "NHWC" -func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr { +// REQUIRES: len(value) >= 0 +func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr { return func(m optionalAttr) { - m["data_format"] = value + m["output_types"] = value } } -// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value. +// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value. // -// value: A bool value to indicate the operation is for training (default) -// or inference. -// If not specified, defaults to true -func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr { +// value: If specified, defines the shape of each tuple component in an +// element produced by the resulting iterator. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr { return func(m optionalAttr) { - m["is_training"] = value + m["output_shapes"] = value } } -// Gradient for batch normalization. -// -// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW". -// The size of 1D Tensors matches the dimension C of the 4D Tensors. +// Converts the given string representing a handle to an iterator to a resource. // // Arguments: -// y_backprop: A 4D Tensor for the gradient with respect to y. -// x: A 4D Tensor for input data. -// scale: A 1D Tensor for scaling factor, to scale the normalized x. -// reserve_space_1: When is_training is True, a 1D Tensor for the computed batch -// mean to be reused in gradient computation. When is_training is -// False, a 1D Tensor for the population mean to be reused in both -// 1st and 2nd order gradient computation. -// reserve_space_2: When is_training is True, a 1D Tensor for the computed batch -// variance (inverted variance in the cuDNN case) to be reused in -// gradient computation. When is_training is False, a 1D Tensor -// for the population variance to be reused in both 1st and 2nd -// order gradient computation. +// string_handle: A string representation of the given handle. // -// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input -// in FusedBatchNorm. -func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) { +// Returns A handle to an iterator resource. +func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) { if scope.Err() != nil { return } @@ -35665,336 +38305,367 @@ func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale a(attrs) } opspec := tf.OpSpec{ - Type: "FusedBatchNormGradV2", + Type: "IteratorFromStringHandle", Input: []tf.Input{ - y_backprop, x, scale, reserve_space_1, reserve_space_2, + string_handle, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4) + return op.Output(0) } -// Creates a TensorArray for storing multiple gradients of values in the given handle. +// Gather slices from `params` axis `axis` according to `indices`. // -// Similar to TensorArrayGradV3. However it creates an accumulator with an -// expanded shape compared to the input TensorArray whose gradient is being -// computed. This enables multiple gradients for the same TensorArray to be -// calculated using the same accumulator. +// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D). +// Produces an output tensor with shape `params.shape[:axis] + indices.shape + +// params.shape[axis + 1:]` where: +// +// ```python +// # Scalar indices (output is rank(params) - 1). +// output[a_0, ..., a_n, b_0, ..., b_n] = +// params[a_0, ..., a_n, indices, b_0, ..., b_n] +// +// # Vector indices (output is rank(params)). +// output[a_0, ..., a_n, i, b_0, ..., b_n] = +// params[a_0, ..., a_n, indices[i], b_0, ..., b_n] +// +// # Higher rank indices (output is rank(params) + rank(indices) - 1). +// output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] = +// params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n] +// ``` +// +//
+// +//
+// +// Note that on CPU, if an out of bound index is found, an error is returned. +// On GPU, if an out of bound index is found, a 0 is stored in the +// corresponding output value. +// +// See also `tf.batch_gather` and `tf.gather_nd`. // // Arguments: -// handle: The handle to the forward TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// shape_to_prepend: An int32 vector representing a shape. Elements in the gradient accumulator will -// have shape which is this shape_to_prepend value concatenated with shape of the -// elements in the TensorArray corresponding to the input handle. -// source: The gradient source string, used to decide which gradient TensorArray -// to return. -func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output, shape_to_prepend tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) { +// params: The tensor from which to gather values. Must be at least rank +// `axis + 1`. +// indices: Index tensor. Must be in range `[0, params.shape[axis])`. +// axis: The axis in `params` to gather `indices` from. Defaults to the first +// dimension. Supports negative indexes. +// +// Returns Values from `params` gathered from indices given by `indices`, with +// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`. +func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"source": source} opspec := tf.OpSpec{ - Type: "TensorArrayGradWithShape", + Type: "GatherV2", Input: []tf.Input{ - handle, flow_in, shape_to_prepend, + params, indices, axis, }, - Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0) } -// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`. -// -// Each comparison returns a boolean `true` (if `input_value > threshold`) -// or and `false` otherwise. -// -// This operation is useful for Locality-Sensitive-Hashing (LSH) and other -// algorithms that use hashing approximations of cosine and `L2` distances; -// codes can be generated from an input via: -// -// ```python -// codebook_size = 50 -// codebook_bits = codebook_size * 32 -// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits], -// dtype=x.dtype, -// initializer=tf.orthogonal_initializer()) -// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.) -// codes = tf.bitcast(codes, tf.int32) # go from uint8 to int32 -// # now codes has shape x.shape[:-1] + [codebook_size] -// ``` -// -// **NOTE**: Currently, the innermost dimension of the tensor must be divisible -// by 8. -// -// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is -// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`. +// Converts the given `resource_handle` representing an iterator to a variant tensor. // // Arguments: -// input: Values to compare against `threshold` and bitpack. -// threshold: Threshold to compare against. +// resource_handle: A handle to an iterator resource. // -// Returns The bitpacked comparisons. -func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) { +// Returns A variant tensor storing the state of the iterator contained in the +// resource. +func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "CompareAndBitpack", + Type: "SerializeIterator", Input: []tf.Input{ - input, threshold, + resource_handle, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Push an element onto the tensor_array. +// Deserializes a proto into the tree handle // // Arguments: -// handle: The handle to a TensorArray. -// index: The position to write to inside the TensorArray. -// value: The tensor to write to the TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. +// tree_handle: Handle to the tree resource to be restored. +// tree_config: Serialied proto string of the boosted_trees.Tree proto. // -// Returns A float scalar that enforces proper chaining of operations. -func TensorArrayWriteV3(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { +// Returns the created operation. +func TensorForestTreeDeserialize(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayWriteV3", + Type: "TensorForestTreeDeserialize", Input: []tf.Input{ - handle, index, value, flow_in, + tree_handle, tree_config, }, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } -// Scatter the data from the input value into specific TensorArray elements. -// -// `indices` must be a vector, its length must match the first dim of `value`. -// -// Arguments: -// handle: The handle to a TensorArray. -// indices: The locations at which to write the tensor elements. -// value: The concatenated tensor to write to the TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns A float scalar that enforces proper chaining of operations. -func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) { +// Constructs an Optional variant from a tuple of tensors. +func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArrayScatterV3", + Type: "OptionalFromValue", Input: []tf.Input{ - handle, indices, value, flow_in, + tf.OutputList(components), }, } op := scope.AddOperation(opspec) return op.Output(0) } -// EmptyAttr is an optional argument to Empty. -type EmptyAttr func(optionalAttr) +// DecodeProtoV2Attr is an optional argument to DecodeProtoV2. +type DecodeProtoV2Attr func(optionalAttr) -// EmptyInit sets the optional init attribute to value. +// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value. // -// value: If True, initialize the returned tensor with the default value of dtype. Otherwise, the implementation is free not to initializethe tensor's content. -// If not specified, defaults to false -func EmptyInit(value bool) EmptyAttr { +// value: Either the special value `local://` or a path to a file containing +// a serialized `FileDescriptorSet`. +// If not specified, defaults to "local://" +func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { return func(m optionalAttr) { - m["init"] = value + m["descriptor_source"] = value } } -// Creates a tensor with the given shape. -// -// This operation creates a tensor of `shape` and `dtype`. -// -// Arguments: -// shape: 1-D. Represents the shape of the output tensor. -// +// DecodeProtoV2MessageFormat sets the optional message_format attribute to value. // -// Returns A `Tensor` of type `T`. -func Empty(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...EmptyAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtype": dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "Empty", - Input: []tf.Input{ - shape, - }, - Attrs: attrs, +// value: Either `binary` or `text`. +// If not specified, defaults to "binary" +func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr { + return func(m optionalAttr) { + m["message_format"] = value } - op := scope.AddOperation(opspec) - return op.Output(0) } -// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3. -type TensorArrayConcatV3Attr func(optionalAttr) - -// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value. +// DecodeProtoV2Sanitize sets the optional sanitize attribute to value. // -// value: The expected shape of an element, if known, -// excluding the first dimension. Used to validate the shapes of -// TensorArray elements. If this shape is not fully specified, concatenating -// zero-size TensorArrays is an error. -// If not specified, defaults to -func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr { +// value: Whether to sanitize the result or not. +// If not specified, defaults to false +func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr { return func(m optionalAttr) { - m["element_shape_except0"] = value + m["sanitize"] = value } } -// Concat the elements from the TensorArray into value `value`. +// The op extracts fields from a serialized protocol buffers message into tensors. // -// Takes `T` elements of shapes +// The `decode_proto` op extracts fields from a serialized protocol buffers +// message into tensors. The fields in `field_names` are decoded and converted +// to the corresponding `output_types` if possible. // -// ``` -// (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...) -// ``` +// A `message_type` name must be provided to give context for the field +// names. The actual message descriptor can be looked up either in the +// linked-in descriptor pool or a filename provided by the caller using +// the `descriptor_source` attribute. // -// and concatenates them into a Tensor of shape: +// Each output tensor is a dense tensor. This means that it is padded to +// hold the largest number of repeated elements seen in the input +// minibatch. (The shape is also padded by one to prevent zero-sized +// dimensions). The actual repeat counts for each example in the +// minibatch can be found in the `sizes` output. In many cases the output +// of `decode_proto` is fed immediately into tf.squeeze if missing values +// are not a concern. When using tf.squeeze, always pass the squeeze +// dimension explicitly to avoid surprises. // -// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)``` +// For the most part, the mapping between Proto field types and +// TensorFlow dtypes is straightforward. However, there are a few +// special cases: // -// All elements must have the same shape (excepting the first dimension). +// - A proto field that contains a submessage or group can only be converted +// to `DT_STRING` (the serialized submessage). This is to reduce the +// complexity of the API. The resulting string can be used as input +// to another instance of the decode_proto op. +// +// - TensorFlow lacks support for unsigned integers. The ops represent uint64 +// types as a `DT_INT64` with the same twos-complement bit pattern +// (the obvious way). Unsigned int32 values can be represented exactly by +// specifying type `DT_INT64`, or using twos-complement if the caller +// specifies `DT_INT32` in the `output_types` attribute. +// +// The `descriptor_source` attribute selects a source of protocol +// descriptors to consult when looking up `message_type`. This may be a +// filename containing a serialized `FileDescriptorSet` message, +// or the special value `local://`, in which case only descriptors linked +// into the code will be searched; the filename can be on any filesystem +// accessible to TensorFlow. +// +// You can build a `descriptor_source` file using the `--descriptor_set_out` +// and `--include_imports` options to the protocol compiler `protoc`. +// +// The `local://` database only covers descriptors linked into the +// code via C++ libraries, not Python imports. You can link in a proto descriptor +// by creating a cc_library target with alwayslink=1. +// +// Both binary and text proto serializations are supported, and can be +// chosen using the `format` attribute. // // Arguments: -// handle: The handle to a TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// dtype: The type of the elem that is returned. +// bytes: Tensor of serialized protos with shape `batch_shape`. +// message_type: Name of the proto message type to decode. +// field_names: List of strings containing proto field names. An extension field can be decoded +// by using its full name, e.g. EXT_PACKAGE.EXT_FIELD_NAME. +// output_types: List of TF types to use for the respective field in field_names. // -// Returns All of the elements in the TensorArray, concatenated along the first -// axis.A vector of the row sizes of the original T elements in the -// value output. In the example above, this would be the values: -// `(n1, n2, ..., n(T-1))`. -func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) { +// Returns Tensor of int32 with shape `[batch_shape, len(field_names)]`. +// Each entry is the number of values found for the corresponding field. +// Optional fields may have 0 or 1 values.List of tensors containing values for the corresponding field. +// `values[i]` has datatype `output_types[i]` +// and shape `[batch_shape, max(sizes[...,i])]`. +func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"dtype": dtype} + attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "TensorArrayConcatV3", + Type: "DecodeProtoV2", Input: []tf.Input{ - handle, flow_in, + bytes, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + if scope.Err() != nil { + return + } + var idx int + var err error + sizes = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("DecodeProtoV2", err) + return + } + return sizes, values } -// Split the data from the input value into TensorArray elements. -// -// Assuming that `lengths` takes on values -// -// ```(n0, n1, ..., n(T-1))``` -// -// and that `value` has shape -// -// ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```, -// -// this splits values into a TensorArray with T tensors. -// -// TensorArray index t will be the subtensor of values with starting position -// -// ```(n0 + n1 + ... + n(t-1), 0, 0, ...)``` -// -// and having size -// -// ```nt x d0 x d1 x ...``` -// -// Arguments: -// handle: The handle to a TensorArray. -// value: The concatenated tensor to write to the TensorArray. -// lengths: The vector of lengths, how to split the rows of value into the -// TensorArray. -// flow_in: A float scalar that enforces proper chaining of operations. -// -// Returns A float scalar that enforces proper chaining of operations. -func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) { +// Creates an Optional variant with no value. +func OptionalNone(scope *Scope) (optional tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArraySplitV3", + Type: "OptionalNone", + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Returns true if and only if the given Optional variant has a value. +func OptionalHasValue(scope *Scope, optional tf.Output) (has_value tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "OptionalHasValue", Input: []tf.Input{ - handle, value, lengths, flow_in, + optional, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes gradients for the scaled exponential linear (Selu) operation. -// -// Arguments: -// gradients: The backpropagated gradients to the corresponding Selu operation. -// outputs: The outputs of the corresponding Selu operation. -// -// Returns The gradients: `gradients * (outputs + scale * alpha)` -// if outputs < 0, `scale * gradients` otherwise. -func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) { +// Returns the value stored in an Optional variant or raises an error if none exists. +func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) { if scope.Err() != nil { return } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "SeluGrad", + Type: "OptionalGetValue", Input: []tf.Input{ - gradients, outputs, + optional, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if components, idx, err = makeOutputList(op, idx, "components"); err != nil { + scope.UpdateErr("OptionalGetValue", err) + return + } + return components +} + +// Gets the next output from the given iterator as an Optional variant. +func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} + opspec := tf.OpSpec{ + Type: "IteratorGetNextAsOptional", + Input: []tf.Input{ + iterator, }, + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Get the current size of the TensorArray. +// Fast Fourier transform. +// +// Computes the 1-dimensional discrete Fourier transform over the inner-most +// dimension of `input`. // // Arguments: -// handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad). -// flow_in: A float scalar that enforces proper chaining of operations. +// input: A complex tensor. // -// Returns The current size of the TensorArray. -func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) { +// Returns A complex tensor of the same shape as `input`. The inner-most +// dimension of `input` is replaced with its 1D Fourier transform. +// +// @compatibility(numpy) +// Equivalent to np.fft.fft +// @end_compatibility +func FFT(scope *Scope, input tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "TensorArraySizeV3", + Type: "FFT", Input: []tf.Input{ - handle, flow_in, + input, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// Deprecated. Use TensorArrayGradV3 +// Identity transformation that models performance. // -// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3 -func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) { +// Identity transformation that models performance. +// +// Arguments: +// input_dataset: A variant tensor representing the input dataset. +// +// +func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"source": source} + attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes} opspec := tf.OpSpec{ - Type: "TensorArrayGradV2", + Type: "ModelDataset", Input: []tf.Input{ - handle, flow_in, + input_dataset, }, Attrs: attrs, } -- GitLab From 737c9072a712f39117a9d213aab81ed9ba17db5f Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 13 Feb 2019 18:51:28 -0800 Subject: [PATCH 110/351] [XLA] Don't return 1 for a random floating point in range [0, 1). The old implementation of this routine was converting a u32 I to a random f32 F in range by doing uint32 I; float F = static_cast(I) / static_cast(2^32); But this will return 1 if that's the closest float to I / 2^32, and we want a float in range [0, 1), not [0, 1]. This is significant particularly for normally-distributed random numbers, because in that case we end up computing erfinv(1) = inf and causing infinities to appear in our normal distribution. Instead, we manually construct a floating-point value. Also fix the interpreter in the same way and add a comment to HloInstruction::CreateRng. PiperOrigin-RevId: 233874065 --- .../xla/service/elemental_ir_emitter.cc | 79 ++++++++++++++----- .../xla/service/elemental_ir_emitter.h | 3 + .../xla/service/hlo_evaluator_typed_visitor.h | 21 ++++- .../compiler/xla/service/hlo_instruction.h | 8 ++ 4 files changed, 89 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index e868dc6d88..808929be75 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1367,26 +1367,69 @@ StatusOr ElementalIrEmitter::ConvertValueForDistribution( llvm_ir::PrimitiveTypeToIrType(elem_prim_ty, module_); llvm::Type* raw_value_ty = raw_value->getType(); - // Convert raw integer to float in range [0, 1) if the element is a float. + // If we're generating a floating-point value, convert the raw integer R (i.e. + // `raw_value`) to a float in the range [0, 1). + // + // The basic approach is to choose a significand and exponent such that the + // significand is uniformly distributed and the exponent is distributed, well, + // exponentially (it's more likely to be close to 0 than far from 0). + // + // An easy way to do this is to say that the significand is the first S bits + // of R, and the exponent is determined by the number of trailing zeroes in R, + // exp = 2^-(cttz(R) + 1). (+1 because the largest exponent should be -1; + // this way the largest value we can return is 1.999... * 2^-1 = 1-ε.) + // + // This results in a small bias. Namely, if R has enough trailing zeroes, the + // significand and exponent will "overlap". As a concrete example, consider + // + // 20 X's 12 zeroes + // R = 0bXXXXXXXXXXXXXXXXXXXX000000000000 + // + // Here the exponent is 2^-13 because R has 12 trailing zeroes. The + // significand is made up of the first 23 most-significant bits of R, which we + // observe contain 3 zeroes. This is biased because any random value with + // exponent 2^-12 will have a significand which ends in `000`. + // + // For f32s, this problem occurs only when there are more than 32-23 = 9 + // trailing zeros, which happens with probability 0.5^10 = ~0.1%. Moreover the + // probability of a large bias (i.e. many trailing 0s in the significand) is + // exponentially low. So we deem this acceptable. llvm::Value* elem_value = raw_value; if (elem_ir_ty->isFloatingPointTy()) { - unsigned raw_value_size_in_bits = raw_value_ty->getPrimitiveSizeInBits(); - CHECK(raw_value_size_in_bits == 32 || raw_value_size_in_bits == 64); - // Perform the division using the float type with the same number of bits - // as the raw value to avoid overflow. - if (raw_value_size_in_bits == 32) { - elem_value = UIToFP(elem_value, b_->getFloatTy()); - elem_value = FDiv(elem_value, - llvm::ConstantFP::get(b_->getFloatTy(), std::exp2(32))); - } else { - elem_value = UIToFP(elem_value, b_->getDoubleTy()); - elem_value = FDiv( - elem_value, llvm::ConstantFP::get(b_->getDoubleTy(), std::exp2(64))); - } - - if (elem_ir_ty != elem_value->getType()) { - elem_value = FPTrunc(elem_value, elem_ir_ty); - } + const auto& dest_flt_semantics = elem_ir_ty->getFltSemantics(); + const int bits = raw_value_ty->getPrimitiveSizeInBits(); + CHECK_GE(bits, llvm::APFloat::semanticsSizeInBits(dest_flt_semantics)); + + // Subtract 1 because semanticsPrecision includes the "hidden bit", i.e. the + // implicit "1." at the beginning of the significand. + const int significand_bits = + llvm::APFloat::semanticsPrecision(dest_flt_semantics) - 1; + + llvm::Value* cttz = llvm_ir::EmitCallToIntrinsic( + llvm::Intrinsic::cttz, {raw_value, /*is_zero_undef=*/b_->getFalse()}, + {raw_value->getType()}, b_); + llvm::Value* significand = LShr(raw_value, bits - significand_bits); + + // Exponent bias is -127 for f32, meaning that if the exponent is E and the + // significand is S, then the value of the number is 2^(E - 127) * (1.S). + // + // We want cttz == 0 to correspond to 2^-1, so our exponent is computed as + // E = 126 - cttz. + // + // For f64, this is all the same, except the bias is -1023. + // + // In IEEE floating point, the absolute value of the exponent bias equals + // the value of the largest possible exponent. + const int bias = -llvm::APFloat::semanticsMaxExponent(dest_flt_semantics); + llvm::Value* exponent = + Sub(llvm::ConstantInt::get(cttz->getType(), -bias - 1), cttz); + + // Now just slot everything into place! The `Trunc` is here because + // raw_value may be larger than our float destination. + elem_value = + BitCast(Trunc(Or(Shl(exponent, significand_bits), significand), + b_->getIntNTy(elem_ir_ty->getPrimitiveSizeInBits())), + elem_ir_ty); } // Convert the value for the requested distribution. diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/elemental_ir_emitter.h index d3e2acaabd..7d360fe38c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.h +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.h @@ -216,8 +216,11 @@ class ElementalIrEmitter : public IrBuilderMixin { llvm_ir::ElementGenerator MakePhiloxRngElementGenerator( const HloInstruction* hlo, const HloToElementGeneratorMap& operand_to_generator); + // Converts the raw value generated by a random number generation algorithm // to the distribution requested by the RNG HloInstruction. + // + // Precondition: raw_value has at least as many bits as hlo's element type. StatusOr ConvertValueForDistribution( const HloInstruction* hlo, const ElementalIrEmitter::HloToElementGeneratorMap& operand_to_generator, diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 8def61dc63..e0a0fc4acb 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -2670,12 +2670,25 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { const Literal& high = parent_->GetEvaluatedLiteralFor(random->operand(1)); - std::uniform_real_distribution generator( - low.Get({}), high.Get({})); - + // std::uniform_real_distribution(a, b) can sometimes return a value + // equal to b. Unclear if this is a spec bug or an implementation bug + // or WAI [0] [1] [2]. Anyway for our purposes we want a half-open + // interval, so we have to re-sample if we get `b` out. + // + // [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63176 + // [1] https://bugs.llvm.org/show_bug.cgi?id=18767 + // [2] http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524 + auto low_val = low.Get({}); + auto high_val = high.Get({}); + std::uniform_real_distribution generator(low_val, high_val); TF_RETURN_IF_ERROR( result.Populate([&](absl::Span /*indexes*/) { - return generator(parent_->engine_); + while (true) { + NativeT v = generator(parent_->engine_); + if (v != high_val) { + return v; + } + } })); break; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index f3a50c5936..d7469e1ac9 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -384,6 +384,14 @@ class HloInstruction { // Creates a random number generation instruction that fills a shape with // random numbers from a given distribution. + // + // The parameters to the instruction are interpreted as follows: + // + // - If `distribution` is RNG_UNIFORM, generates a number in range + // [param0, param1). + // + // - If `distribution` is RNG_NORMAL, generates a normally-distributed value + // with mean `param0` and standard deviation `param1`. static std::unique_ptr CreateRng( const Shape& shape, RandomDistribution distribution, absl::Span parameters); -- GitLab From 754b8a18b2a9aca1b53ef734b567b170ea3de00d Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Wed, 13 Feb 2019 18:55:01 -0800 Subject: [PATCH 111/351] Add 2.0 summary ops from TensorBoard to tf.summary via component package This change has the effect of importing scalar(), image() and friends from TensorBoard's summary API into the TF 2.0 tf.summary module. It actually does so by replacing tf.summary entirely, via the component_api_helper() mechanism (in use for tf.estimator), with a replacement package from TensorBoard that re-exports the original tf.summary symbols, thereby creating a fused namespace. The component package approach mitigates circular dependency issues (a risk because the TensorBoard ops in turn depend on TF) because the TF API __init__.py imports the core TF API symbols before it imports components, so if they in turn `import tensorflow` the module has been mostly initialized. It also means that TensorBoard needn't be importable at API generation time, only when TF is actually imported, which avoids issues like #22395. Tracking issue: #25356. PiperOrigin-RevId: 233874463 --- tensorflow/api_template.__init__.py | 24 +++--- tensorflow/api_template_v1.__init__.py | 2 +- tensorflow/compat_template.__init__.py | 6 ++ .../api/golden/v2/tensorflow.summary.pbtxt | 20 +++++ tensorflow/tools/api/tests/BUILD | 1 + .../tools/api/tests/api_compatibility_test.py | 37 ++++++++- tensorflow/tools/compatibility/ast_edits.py | 2 +- tensorflow/tools/compatibility/reorders_v2.py | 2 +- .../tools/compatibility/tf_upgrade_v2.py | 76 ++++++++++++++++++- .../tools/compatibility/tf_upgrade_v2_test.py | 37 +++++++++ 10 files changed, 190 insertions(+), 17 deletions(-) diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py index a6eb4755f3..ddcacfcbe2 100644 --- a/tensorflow/api_template.__init__.py +++ b/tensorflow/api_template.__init__.py @@ -26,14 +26,28 @@ import sys as _sys # API IMPORTS PLACEHOLDER +# Make sure directory containing top level submodules is in +# the __path__ so that "from tensorflow.foo import bar" works. +# We're using bitwise, but there's nothing special about that. +_API_MODULE = bitwise # pylint: disable=undefined-variable +_current_module = _sys.modules[__name__] +_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__)) +if not hasattr(_current_module, '__path__'): + __path__ = [_tf_api_dir] +elif _tf_api_dir not in __path__: + __path__.append(_tf_api_dir) + # pylint: disable=g-bad-import-order from tensorflow.python.tools import component_api_helper as _component_api_helper +_component_api_helper.package_hook( + parent_package_str=__name__, + child_package_str=('tensorboard.summary._tf.summary'), + error_msg="Limited tf.summary API due to missing TensorBoard installation") _component_api_helper.package_hook( parent_package_str=__name__, child_package_str=( 'tensorflow_estimator.python.estimator.api._v2.estimator')) -_current_module = _sys.modules[__name__] if not hasattr(_current_module, 'estimator'): _component_api_helper.package_hook( parent_package_str=__name__, @@ -42,14 +56,6 @@ if not hasattr(_current_module, 'estimator'): _component_api_helper.package_hook( parent_package_str=__name__, child_package_str=('tensorflow.python.keras.api._v2.keras')) -# Make sure directory containing top level submodules is in -# the __path__ so that "from tensorflow.foo import bar" works. -# We're using bitwise, but there's nothing special about that. -_tf_api_dir = _os.path.dirname(_os.path.dirname(bitwise.__file__)) # pylint: disable=undefined-variable -if not hasattr(_current_module, '__path__'): - __path__ = [_tf_api_dir] -elif _tf_api_dir not in __path__: - __path__.append(_tf_api_dir) # Enable TF2 behaviors from tensorflow.python.compat import v2_compat as _compat # pylint: disable=g-import-not-at-top diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index eeca8f0d56..5eb25a81b7 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -70,7 +70,7 @@ _API_MODULE = app # pylint: disable=undefined-variable # Make sure directory containing top level submodules is in # the __path__ so that "from tensorflow.foo import bar" works. -_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__)) # pylint: disable=undefined-variable +_tf_api_dir = _os.path.dirname(_os.path.dirname(_API_MODULE.__file__)) if not hasattr(_current_module, '__path__'): __path__ = [_tf_api_dir] elif _tf_api_dir not in __path__: diff --git a/tensorflow/compat_template.__init__.py b/tensorflow/compat_template.__init__.py index a0c3b0f6fc..2cf68c9cd8 100644 --- a/tensorflow/compat_template.__init__.py +++ b/tensorflow/compat_template.__init__.py @@ -26,6 +26,12 @@ import sys as _sys # API IMPORTS PLACEHOLDER from tensorflow.python.tools import component_api_helper as _component_api_helper +_component_api_helper.package_hook( + parent_package_str=__name__, + child_package_str=('tensorboard.summary._tf.summary'), + error_msg=( + "Limited tf.compat.v2.summary API due to missing TensorBoard " + "installation")) _component_api_helper.package_hook( parent_package_str=__name__, child_package_str=( diff --git a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt index 335489865b..85edef9d7e 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.summary.pbtxt @@ -4,6 +4,10 @@ tf_module { name: "SummaryWriter" mtype: "" } + member_method { + name: "audio" + argspec: "args=[\'name\', \'data\', \'sample_rate\', \'step\', \'max_outputs\', \'encoding\', \'description\'], varargs=None, keywords=None, defaults=[\'3\', \'None\', \'None\'], " + } member_method { name: "create_file_writer" argspec: "args=[\'logdir\', \'max_queue\', \'flush_millis\', \'filename_suffix\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " @@ -12,6 +16,14 @@ tf_module { name: "flush" argspec: "args=[\'writer\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "histogram" + argspec: "args=[\'name\', \'data\', \'step\', \'buckets\', \'description\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } + member_method { + name: "image" + argspec: "args=[\'name\', \'data\', \'step\', \'max_outputs\', \'description\'], varargs=None, keywords=None, defaults=[\'3\', \'None\'], " + } member_method { name: "import_event" argspec: "args=[\'tensor\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -20,10 +32,18 @@ tf_module { name: "record_if" argspec: "args=[\'condition\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "scalar" + argspec: "args=[\'name\', \'data\', \'step\', \'description\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "summary_scope" argspec: "args=[\'name\', \'default_name\', \'values\'], varargs=None, keywords=None, defaults=[\'summary\', \'None\'], " } + member_method { + name: "text" + argspec: "args=[\'name\', \'data\', \'step\', \'description\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "write" argspec: "args=[\'tag\', \'tensor\', \'step\', \'metadata\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 8764409e4d..1675dfa512 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -31,6 +31,7 @@ py_test( "//tensorflow/tools/api/lib:python_object_to_proto_visitor", "//tensorflow/tools/common:public_api", "//tensorflow/tools/common:traverse", + "@six_archive//:six", ], ) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index 0d3501a45d..dad798c838 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -32,6 +32,7 @@ import os import re import sys +import six import tensorflow as tf from google.protobuf import message @@ -134,6 +135,29 @@ def _FilterNonCoreGoldenFiles(golden_file_list): return filtered_file_list +def _FilterGoldenProtoDict(golden_proto_dict, omit_golden_symbols_map): + """Filter out golden proto dict symbols that should be omitted.""" + if not omit_golden_symbols_map: + return golden_proto_dict + filtered_proto_dict = dict(golden_proto_dict) + for key, symbol_list in six.iteritems(omit_golden_symbols_map): + api_object = api_objects_pb2.TFAPIObject() + api_object.CopyFrom(filtered_proto_dict[key]) + filtered_proto_dict[key] = api_object + module_or_class = None + if api_object.HasField('tf_module'): + module_or_class = api_object.tf_module + elif api_object.HasField('tf_class'): + module_or_class = api_object.tf_class + if module_or_class is not None: + for members in (module_or_class.member, module_or_class.member_method): + filtered_members = [m for m in members if m.name not in symbol_list] + # Two steps because protobuf repeated fields disallow slice assignment. + del members[:] + members.extend(filtered_members) + return filtered_proto_dict + + class ApiCompatibilityTest(test.TestCase): def __init__(self, *args, **kwargs): @@ -270,7 +294,8 @@ class ApiCompatibilityTest(test.TestCase): root, golden_file_pattern, api_version, - additional_private_map=None): + additional_private_map=None, + omit_golden_symbols_map=None): # Extract all API stuff. visitor = python_object_to_proto_visitor.PythonObjectToProtoVisitor() @@ -303,6 +328,8 @@ class ApiCompatibilityTest(test.TestCase): _FileNameToKey(filename): _ReadFileToProto(filename) for filename in golden_file_list } + golden_proto_dict = _FilterGoldenProtoDict(golden_proto_dict, + omit_golden_symbols_map) # Diff them. Do not fail if called with update. # If the test is run to update goldens, only report diffs but do not fail. @@ -346,11 +373,17 @@ class ApiCompatibilityTest(test.TestCase): golden_file_pattern = os.path.join( resource_loader.get_root_dir_with_all_resources(), _KeyToFilePath('*', api_version)) + omit_golden_symbols_map = {} + if FLAGS.only_test_core_api: + # In TF 2.0 these summary symbols are imported from TensorBoard. + omit_golden_symbols_map['tensorflow.summary'] = [ + 'audio', 'histogram', 'image', 'scalar', 'text'] self._checkBackwardsCompatibility( tf.compat.v2, golden_file_pattern, api_version, - additional_private_map={'tf.compat': ['v1', 'v2']}) + additional_private_map={'tf.compat': ['v1', 'v2']}, + omit_golden_symbols_map=omit_golden_symbols_map) if __name__ == '__main__': diff --git a/tensorflow/tools/compatibility/ast_edits.py b/tensorflow/tools/compatibility/ast_edits.py index 0e2424654b..3d421f6704 100644 --- a/tensorflow/tools/compatibility/ast_edits.py +++ b/tensorflow/tools/compatibility/ast_edits.py @@ -286,7 +286,7 @@ class _PastaEditVisitor(ast.NodeVisitor): arg_warnings = self._get_applicable_dict("function_arg_warnings", full_name, name) - for (kwarg, arg), (level, warning) in arg_warnings.items(): + for (kwarg, arg), (level, warning) in sorted(arg_warnings.items()): present, _ = get_arg_value(node, kwarg, arg) if present: warned = True diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py index e7edf3f724..8acd17d73f 100644 --- a/tensorflow/tools/compatibility/reorders_v2.py +++ b/tensorflow/tools/compatibility/reorders_v2.py @@ -58,7 +58,7 @@ reorders = { 'tf.math.reduce_prod': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'], 'tf.math.reduce_sum': ['input_tensor', 'axis', 'keepdims', 'name', 'reduction_indices', 'keep_dims'], 'tf.multinomial': ['logits', 'num_samples', 'seed', 'name', 'output_dtype'], - 'tf.nn.conv1d': ['value', 'filters', 'stride', 'padding', 'use_cudnn_on_gpu', 'data_format', 'name', 'input'], + 'tf.nn.conv1d': ['value', 'filters', 'stride', 'padding', 'use_cudnn_on_gpu', 'data_format', 'name', 'input', 'dilations'], 'tf.nn.conv2d': ['input', 'filter', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'], 'tf.nn.conv2d_backprop_filter': ['input', 'filter_sizes', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name'], 'tf.nn.conv2d_backprop_input': ['input_sizes', 'filter', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'], diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index 3345434c86..25b320b982 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -411,6 +411,20 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "filter": "filters", "use_cudnn_on_gpu": None, }, + "tf.contrib.summary.audio": { + "family": None, + }, + "tf.contrib.summary.histogram": { + "family": None, + }, + "tf.contrib.summary.image": { + "bad_color": None, + "max_images": "max_outputs", + "family": None, + }, + "tf.contrib.summary.scalar": { + "family": None, + }, } # pylint: disable=line-too-long @@ -565,12 +579,20 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.nn.rnn_cell.RNNCell", "tf.contrib.rnn.LSTMStateTuple": "tf.nn.rnn_cell.LSTMStateTuple", - "tf.contrib.summary.initialize": - "tf.compat.v1.summary.initialize", "tf.contrib.framework.sort": "tf.sort", "tf.contrib.framework.argsort": "tf.argsort", + "tf.contrib.summary.audio": + "tf.compat.v2.summary.audio", + "tf.contrib.summary.histogram": + "tf.compat.v2.summary.histogram", + "tf.contrib.summary.image": + "tf.compat.v2.summary.image", + "tf.contrib.summary.initialize": + "tf.compat.v1.summary.initialize", + "tf.contrib.summary.scalar": + "tf.compat.v2.summary.scalar", "tf.count_nonzero": "tf.math.count_nonzero", "tf.manip.batch_to_space_nd": @@ -834,10 +856,24 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.nn.max_pool", } + # Manual mapping of function names to be reordered to their list of argument + # names, in order. Only use this if argument names cannot be autodetected, + # e.g. if the functions are in contrib. + self.manual_function_reorders = { + "tf.contrib.summary.audio": [ + "name", "tensor", "sample_rate", "max_outputs", "family", "step"], + "tf.contrib.summary.histogram": [ + "name", "tensor", "family", "step"], + "tf.contrib.summary.image": [ + "name", "tensor", "bad_color", "max_images", "family", "step"], + "tf.contrib.summary.scalar": [ + "name", "tensor", "family", "step"], + } # Functions that were reordered should be changed to the new keyword args # for safety, if positional arguments are used. If you have reversed the # positional arguments yourself, this could do the wrong thing. - self.function_reorders = reorders_v2.reorders + self.function_reorders = dict(reorders_v2.reorders) + self.function_reorders.update(self.manual_function_reorders) contrib_warning = ( ast_edits.ERROR, @@ -1266,6 +1302,40 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.cond no longer takes 'strict' argument, it behaves as " "if was set to True.") }, + "tf.contrib.summary.audio": { + ("family", 4): ( + ast_edits.WARNING, + "tf.contrib.summary.* functions no longer take the 'family' " + "argument; instead name scoping should be used. This call site " + "specifies a family argument so it cannot be converted safely.") + }, + "tf.contrib.summary.histogram": { + ("family", 2): ( + ast_edits.WARNING, + "tf.contrib.summary.* functions no longer take the 'family' " + "argument; instead name scoping should be used. This call site " + "specifies a family argument so it cannot be converted safely.") + }, + "tf.contrib.summary.image": { + ("bad_color", 2): ( + ast_edits.WARNING, + "tf.contrib.summary.image no longer takes the 'bad_color' " + "argument; caller must now preprocess if needed. This call " + "site specifies a bad_color argument so it cannot be converted " + "safely."), + ("family", 4): ( + ast_edits.WARNING, + "tf.contrib.summary.* functions no longer take the 'family' " + "argument; instead name scoping should be used. This call site " + "specifies a family argument so it cannot be converted safely.") + }, + "tf.contrib.summary.scalar": { + ("family", 2): ( + ast_edits.WARNING, + "tf.contrib.summary.* functions no longer take the 'family' " + "argument; instead name scoping should be used. This call site " + "specifies a family argument so it cannot be converted safely.") + }, } # Specially handled functions diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index db05f71e9c..dc03f37a80 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -343,6 +343,8 @@ class TestUpgrade(test_util.TensorFlowTestCase): tf_upgrade_v2.TFAPIChangeSpec().reordered_function_names) function_reorders = ( tf_upgrade_v2.TFAPIChangeSpec().function_reorders) + manual_function_reorders = ( + tf_upgrade_v2.TFAPIChangeSpec().manual_function_reorders) added_names_message = """Some function names in self.reordered_function_names are not in reorders_v2.py. @@ -362,6 +364,8 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map # function_reorders should contain reordered_function_names # and their TensorFlow V1 aliases. for name in function_reorders: + if name in manual_function_reorders: + continue # get other names for this function attr = get_symbol_for_name(tf.compat.v1, name) _, attr = tf_decorator.unwrap(attr) @@ -1228,6 +1232,39 @@ def _log_prob(self, x): _, _, _, new_text = self._upgrade(text) self.assertEqual(expected_text, new_text) + def test_contrib_summary_audio(self): + text = "tf.contrib.summary.audio('foo', myval, 44100, 3, 'fam', 42)" + expected = ("tf.compat.v2.summary.audio(name='foo', tensor=myval, " + "sample_rate=44100, max_outputs=3, step=42)") + _, _, errors, new_text = self._upgrade(text) + self.assertEqual(expected, new_text) + self.assertIn("'family' argument", errors[0]) + + def test_contrib_summary_histogram(self): + text = "tf.contrib.summary.histogram('foo', myval, 'fam', 42)" + expected = ("tf.compat.v2.summary.histogram(name='foo', tensor=myval, " + "step=42)") + _, _, errors, new_text = self._upgrade(text) + self.assertEqual(expected, new_text) + self.assertIn("'family' argument", errors[0]) + + def test_contrib_summary_image(self): + text = "tf.contrib.summary.image('foo', myval, red, 3, 'fam', 42)" + expected = ("tf.compat.v2.summary.image(name='foo', tensor=myval, " + "max_outputs=3, step=42)") + _, _, errors, new_text = self._upgrade(text) + self.assertEqual(expected, new_text) + self.assertIn("'bad_color' argument", errors[0]) + self.assertIn("'family' argument", errors[1]) + + def test_contrib_summary_scalar(self): + text = "tf.contrib.summary.scalar('foo', myval, 'fam', 42)" + expected = ("tf.compat.v2.summary.scalar(name='foo', tensor=myval, " + "step=42)") + _, _, errors, new_text = self._upgrade(text) + self.assertEqual(expected, new_text) + self.assertIn("'family' argument", errors[0]) + class TestUpgradeFiles(test_util.TensorFlowTestCase): -- GitLab From 23806cfc2bb8093e2a95dc71b06e7d0f2c0a7fe8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 18:57:27 -0800 Subject: [PATCH 112/351] Allows Keras symbolic tensors to work with tf.identity PiperOrigin-RevId: 233874689 --- .../python/keras/layers/tensorflow_op_layer_test.py | 9 +++++++++ tensorflow/python/ops/array_ops.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py index 77496ba4a6..4ffb260b17 100644 --- a/tensorflow/python/keras/layers/tensorflow_op_layer_test.py +++ b/tensorflow/python/keras/layers/tensorflow_op_layer_test.py @@ -41,6 +41,14 @@ def _single_op_at_end(): return inputs, outputs +def _single_identity_op_at_end(): + inputs = keras.Input(shape=(10,)) + x = keras.layers.Dense(10)(inputs) + outputs = array_ops.identity(x) + assert 'Identity' in outputs.name + return inputs, outputs + + def _multiple_ops_at_end(): inputs = keras.Input(shape=(10,)) x = keras.layers.Dense(10)(inputs) @@ -136,6 +144,7 @@ class AutoLambdaTest(keras_parameterized.TestCase): @parameterized.named_parameters( ('single_op_at_end', _single_op_at_end), + ('single_identity_op_at_end', _single_identity_op_at_end), ('multiple_ops_at_end', _multiple_ops_at_end), ('single_op_in_middle', _single_op_in_middle), ('multiple_ops_in_middle', _multiple_ops_in_middle), diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 054f21cfe9..977467d222 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -67,7 +67,7 @@ def identity(input, name=None): # pylint: disable=redefined-builtin Returns: A `Tensor`. Has the same type as `input`. """ - if context.executing_eagerly(): + if context.executing_eagerly() and not hasattr(input, "graph"): input = ops.convert_to_tensor(input) in_device = input.device # TODO(ashankar): Does 'identity' need to invoke execution callbacks? -- GitLab From 269ec76458f44da3407f272adaa62c5e0d32403b Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 13 Feb 2019 19:07:53 -0800 Subject: [PATCH 113/351] [XLA] Use a simpler construction for FloorDiv of floats. XLA has a floor function, so FloorDiv is simply Floor(Div(x, y)) instead of the complex construction here (which I presume must have predated the XLA Floor function?). PiperOrigin-RevId: 233876144 --- tensorflow/compiler/tf2xla/kernels/binary_ops.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index ad6b334326..66446106d3 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -79,7 +79,10 @@ static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, XLA_MAKE_BINARY(DivNoNan, DivNoNanImpl(b, input_type(0), lhs, rhs, broadcast_helper)); -// Implementation of FloorDiv. Pseudo-code: +// Implementation of FloorDiv. +// +// For floating-point values, simply returns floor(x / y). For integers, does: +// // if ((x < 0) != (y < 0)) { // T abs_x = std::abs(x); // T abs_y = std::abs(y); @@ -90,6 +93,9 @@ XLA_MAKE_BINARY(DivNoNan, static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); + if (DataTypeIsFloating(dtype)) { + return xla::Floor(xla::Div(x, y)); + } if (DataTypeIsUnsigned(dtype)) { return xla::Div(x, y); } @@ -99,11 +105,7 @@ static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, auto abs_x = xla::Abs(x); auto abs_y = xla::Abs(y); auto t = xla::Neg(xla::Sub(xla::Add(abs_x, abs_y), one)); - auto result = xla::Select(different_sign, xla::Div(t, abs_y), xla::Div(x, y)); - if (DataTypeIsFloating(dtype)) { - result = xla::Floor(result); - } - return result; + return xla::Select(different_sign, xla::Div(t, abs_y), xla::Div(x, y)); } XLA_MAKE_BINARY(FloorDiv, FloorDivImpl(b, input_type(0), lhs, rhs, broadcast_helper)); -- GitLab From 33f56f0290a169f5bb5c0fcfad96ffbfeb9ee211 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 13 Feb 2019 20:03:58 -0800 Subject: [PATCH 114/351] Do not require element_shape as a compile-time constant in xla TensorListReserve op. PiperOrigin-RevId: 233881299 --- .../compiler/tests/tensor_list_ops_test.py | 32 ++++- .../tf2xla/kernels/tensor_list_ops.cc | 111 ++++++++++++++---- tensorflow/compiler/tf2xla/xla_op_kernel.cc | 21 ++++ tensorflow/compiler/tf2xla/xla_op_kernel.h | 4 + 4 files changed, 144 insertions(+), 24 deletions(-) diff --git a/tensorflow/compiler/tests/tensor_list_ops_test.py b/tensorflow/compiler/tests/tensor_list_ops_test.py index 47e0f384a4..a380715301 100644 --- a/tensorflow/compiler/tests/tensor_list_ops_test.py +++ b/tensorflow/compiler/tests/tensor_list_ops_test.py @@ -102,7 +102,7 @@ class ListOpsTest(xla_test.XLATestCase): _, e = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32) with self.assertRaisesRegexp(errors.InvalidArgumentError, "Set the max number of elements"): - self.assertEqual(sess.run(e), 1.0 * np.ones((7, 15))) + self.assertAllEqual(sess.run(e), 1.0 * np.ones((7, 15))) def testEmptyTensorListMax(self): with self.cached_session() as sess, self.test_scope(): @@ -136,6 +136,17 @@ class ListOpsTest(xla_test.XLATestCase): t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.assertAllEqual(t, [3.0, 2.0]) + def testSetDoesNotUpdatePushIndex(self): + with self.cached_session(), self.test_scope(): + l = list_ops.empty_tensor_list( + element_shape=[], element_dtype=dtypes.float32, max_num_elements=2) + # SetItem should not change the push index. + l = list_ops.tensor_list_set_item(l, 1, 3.) + l = list_ops.tensor_list_push_back(l, 5.) + l = list_ops.tensor_list_push_back(l, 7.) + t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) + self.assertAllEqual(t, [5., 7.]) + def testGetSetReserved(self): with self.cached_session(), self.test_scope(): l = list_ops.tensor_list_reserve( @@ -146,6 +157,25 @@ class ListOpsTest(xla_test.XLATestCase): t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) self.assertAllEqual(t, [3.0, 0.0]) + def testSetStackReservedUnknownElementShape(self): + with self.cached_session(), self.test_scope(): + l = list_ops.tensor_list_reserve( + element_dtype=dtypes.float32, element_shape=None, num_elements=2) + l = list_ops.tensor_list_set_item(l, 0, [3.0, 4.0]) + t = list_ops.tensor_list_stack(l, element_dtype=dtypes.float32) + self.assertAllEqual(t, [[3.0, 4.0], [0., 0.]]) + + def testPushInEmptyListWithUnknownElementShape(self): + with self.cached_session(), self.test_scope(): + l = list_ops.empty_tensor_list( + element_dtype=dtypes.float32, element_shape=None, max_num_elements=2) + l = list_ops.tensor_list_push_back(l, [3.0, 4.0]) + # Pushing an element with a different shape should raise an error. + with self.assertRaisesRegexp(errors.InvalidArgumentError, "Shape"): + l = list_ops.tensor_list_push_back(l, 5.) + self.evaluate( + list_ops.tensor_list_stack(l, element_dtype=dtypes.float32)) + def testGetSetReservedNonScalar(self): with self.cached_session() as sess, self.test_scope(): l = list_ops.tensor_list_reserve( diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc index 6502001228..8958a48bc7 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_list_ops.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" +#include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/core/framework/bounds_check.h" #include "tensorflow/core/framework/op_kernel.h" @@ -35,6 +36,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/kernels/concat_lib.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" @@ -69,6 +71,43 @@ class TensorListLengthOp : public XlaOpKernel { REGISTER_XLA_OP(Name("TensorListLength"), TensorListLengthOp); +// Creates an empty list with size (leading_dim, *element_shape) if +// element_shape is known at compile time. Otherwise creates one with size +// (leading_dim, 0) which gets initialized later in `GetInitializedList`. +Status CreateZerosList(XlaOpKernelContext* ctx, int element_shape_index, + int64 leading_dim, DataType dtype, xla::XlaOp* list) { + TensorShape list_shape; + list_shape.AddDim(leading_dim); + xla::XlaOp element_shape_handle = ctx->Input(element_shape_index); + TF_ASSIGN_OR_RETURN( + bool is_element_shape_compile_time_const, + element_shape_handle.builder()->IsConstant(element_shape_handle)); + PartialTensorShape partial_element_shape; + if (is_element_shape_compile_time_const) { + TF_RETURN_IF_ERROR(ctx->ConstantInputAsPartialShape( + element_shape_index, &partial_element_shape)); + } + if (is_element_shape_compile_time_const && + partial_element_shape.IsFullyDefined()) { + TensorShape element_shape; + partial_element_shape.AsTensorShape(&element_shape); + list_shape.AppendShape(element_shape); + } else { + // If element_shape is not a compile time constant or if it is not fully + // defined we will have to wait for the first write call to fully allocate + // the array. + // TODO(srbs): We are using element_shape of [0] as a proxy to denote an + // uninitialized list. A better implementation may be to represent the + // list as a 3-tuple containining an explicit "initialized" flag. However, + // we would still need to create a dummy tensor for the first tuple + // element. + list_shape.AddDim(0); + } + *list = xla::Broadcast(XlaHelpers::Zero(ctx->builder(), dtype), + list_shape.dim_sizes()); + return Status::OK(); +} + class TensorListReserveOp : public XlaOpKernel { public: explicit TensorListReserveOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { @@ -76,20 +115,15 @@ class TensorListReserveOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - TensorShape element_shape; - OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape)); int64 num_elements; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &num_elements)); - TensorShape tensor_shape; - tensor_shape.AddDim(num_elements); - tensor_shape.AppendShape(element_shape); + xla::XlaOp list; + OP_REQUIRES_OK(ctx, CreateZerosList(ctx, 0, num_elements, dtype_, &list)); xla::XlaBuilder* b = ctx->builder(); ctx->SetTensorListOutput( - 0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_), - tensor_shape.dim_sizes()), - xla::ConstantR0(b, num_elements)})); + 0, xla::Tuple(b, {list, xla::ConstantR0(b, num_elements)})); } private: @@ -110,8 +144,6 @@ class EmptyTensorListOp : public XlaOpKernel { } void Compile(XlaOpKernelContext* ctx) override { - TensorShape element_shape; - OP_REQUIRES_OK(ctx, ctx->ConstantInputAsShape(0, &element_shape)); int64 max_num_elements; OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(1, &max_num_elements)); OP_REQUIRES( @@ -119,15 +151,13 @@ class EmptyTensorListOp : public XlaOpKernel { errors::InvalidArgument("XLA compilation requires a fixed tensor list " "size. Set the max number of elements.")); - TensorShape tensor_shape; - tensor_shape.AddDim(max_num_elements); - tensor_shape.AppendShape(element_shape); + xla::XlaOp list; + OP_REQUIRES_OK(ctx, + CreateZerosList(ctx, 0, max_num_elements, dtype_, &list)); xla::XlaBuilder* b = ctx->builder(); ctx->SetTensorListOutput( - 0, xla::Tuple(b, {xla::Broadcast(XlaHelpers::Zero(b, dtype_), - tensor_shape.dim_sizes()), - xla::ConstantR0(b, 0)})); + 0, xla::Tuple(b, {list, xla::ConstantR0(b, 0)})); } private: @@ -274,6 +304,36 @@ REGISTER_XLA_OP( Name("TensorListFromTensor").CompileTimeConstantInput("element_shape"), TensorListFromTensorOp); +// Returns the 0'th element of `tuple` containing the list tensor if it has been +// initialized already else creates one lazily. This allows lazy initialization +// of the list on the first call to SetItem or PushBack. +Status GetInitializedList(XlaOpKernelContext* ctx, const xla::XlaOp& tuple, + const TensorShape& element_shape, DataType dtype, + xla::XlaOp* list) { + *list = xla::GetTupleElement(tuple, 0); + TensorShape list_shape; + TF_RETURN_IF_ERROR(GetTensorListShape(ctx->builder(), tuple, &list_shape)); + int64 leading_dim = list_shape.dim_size(0); + TensorShape list_element_shape = list_shape; + list_element_shape.RemoveDim(0); + // This checks for the lazy initialization contract set by CreateEmptyList. + // In TensorListReserve if the element_shape is not known at compile time, + // it creates a list with shape [leading_dim, 0]. + if (element_shape != list_element_shape) { + if (list_element_shape.num_elements() != 0) { + return errors::InvalidArgument( + "Invalid shape of value in TensorListSetItem. Expected: ", + list_element_shape.DebugString(), + " Actual: ", element_shape.DebugString()); + } + list_shape = element_shape; + list_shape.InsertDim(0, leading_dim); + *list = xla::Broadcast(XlaHelpers::Zero(ctx->builder(), dtype), + list_shape.dim_sizes()); + } + return Status::OK(); +} + class TensorListSetItemOp : public XlaOpKernel { public: explicit TensorListSetItemOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { @@ -285,7 +345,9 @@ class TensorListSetItemOp : public XlaOpKernel { xla::XlaOp tl = ctx->Input(0); TensorShape elem_shape = ctx->InputShape(2); - xla::XlaOp ta = xla::GetTupleElement(tl, 0); + xla::XlaOp list; + OP_REQUIRES_OK(ctx, GetInitializedList(ctx, tl, elem_shape, dtype_, &list)); + xla::XlaOp index = ctx->Input(1); xla::XlaOp value = ctx->Input(2); @@ -299,8 +361,8 @@ class TensorListSetItemOp : public XlaOpKernel { auto update = xla::Reshape(value, slice_shape.dim_sizes()); ctx->SetTensorListOutput( - 0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices), - index + xla::ConstantR0(b, 1)})); + 0, xla::Tuple(b, {xla::DynamicUpdateSlice(list, update, start_indices), + xla::GetTupleElement(tl, 1)})); } private: @@ -319,11 +381,14 @@ class TensorListPushBackOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { xla::XlaBuilder* b = ctx->builder(); - xla::XlaOp tl = ctx->Input(0); + xla::XlaOp list_tuple = ctx->Input(0); TensorShape elem_shape = ctx->InputShape(1); - xla::XlaOp ta = xla::GetTupleElement(tl, 0); - xla::XlaOp index = xla::GetTupleElement(tl, 1); + xla::XlaOp list; + OP_REQUIRES_OK( + ctx, GetInitializedList(ctx, list_tuple, elem_shape, dtype_, &list)); + + xla::XlaOp index = xla::GetTupleElement(list_tuple, 1); xla::XlaOp value = ctx->Input(1); // start_indices of the DynamicUpdateSlice are [index, 0, 0, ..., 0]. @@ -336,7 +401,7 @@ class TensorListPushBackOp : public XlaOpKernel { auto update = xla::Reshape(value, slice_shape.dim_sizes()); ctx->SetTensorListOutput( - 0, xla::Tuple(b, {xla::DynamicUpdateSlice(ta, update, start_indices), + 0, xla::Tuple(b, {xla::DynamicUpdateSlice(list, update, start_indices), index + xla::ConstantR0(b, 1)})); } diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.cc b/tensorflow/compiler/tf2xla/xla_op_kernel.cc index 0c80b26dff..ee11f3a3de 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.cc +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.cc @@ -319,6 +319,27 @@ Status XlaOpKernelContext::ConstantInputAsShape(int index, TensorShape* shape) { return Status::OK(); } +Status XlaOpKernelContext::ConstantInputAsPartialShape( + int index, PartialTensorShape* shape) { + xla::Literal literal; + TF_RETURN_IF_ERROR(ConstantInput(index, &literal)); + // If `literal` is a scalar it's value must be -1. + if (literal.shape().rank() == 0) { + int64 shape_val; + TF_RETURN_IF_ERROR(LiteralToInt64Scalar(literal, &shape_val)); + if (shape_val != -1) { + return errors::InvalidArgument( + "Cannot convert value to PartialTensorShape: ", shape_val); + } + *shape = PartialTensorShape(); // Shape with unknown rank. + return Status::OK(); + } + std::vector dims; + TF_RETURN_IF_ERROR(LiteralToInt64Vector(literal, &dims)); + *shape = PartialTensorShape(dims); + return Status::OK(); +} + Status XlaOpKernelContext::InputList(absl::string_view name, std::vector* handles, std::vector* shapes) { diff --git a/tensorflow/compiler/tf2xla/xla_op_kernel.h b/tensorflow/compiler/tf2xla/xla_op_kernel.h index b3cef40db8..cc2d5e8de3 100644 --- a/tensorflow/compiler/tf2xla/xla_op_kernel.h +++ b/tensorflow/compiler/tf2xla/xla_op_kernel.h @@ -138,6 +138,10 @@ class XlaOpKernelContext { // Converts a constant 1D int32 or int64 tensor into a TensorShape. Status ConstantInputAsShape(int index, TensorShape* shape); + // Converts a constant 1D int32 or int64 tensor, or a scalar with value -1 + // into a PartialTensorShape. + Status ConstantInputAsPartialShape(int index, PartialTensorShape* shape); + // Returns the named list-valued immutable input in "list", as // defined in the OpDef. If the named output is not list-valued, // returns a one-element list. -- GitLab From d075fc2608d65dcc0defd5ceeb6a470d00c80e8d Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 14 Feb 2019 10:12:45 +0530 Subject: [PATCH 115/351] Update hlo_runner.h bounary -> boundary --- tensorflow/compiler/xla/service/hlo_runner.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index fb897aa959..098989cd4c 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -60,7 +60,7 @@ class HloRunner { // The number of times the infeed literal should be fed to the HLO module. // For a clean exit, this should match the iterations-per-loop parameter // used when generating the HLO module proto (that is usually the main - // while bounary counter). A value higher then iterations-per-loop would + // while boundary counter). A value higher then iterations-per-loop would // lead to infeed threads feeding to a gone computation, while a lower // value would trigger a stuck ExecuteReplicated() call (the computation // will be trying to infeed data which will never come). -- GitLab From 5c1b7923a4d2c934a4618ecfc511d6e7cd53bee7 Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 14 Feb 2019 10:14:11 +0530 Subject: [PATCH 116/351] Update tensor_handle.h compuatation -> computation --- tensorflow/core/common_runtime/eager/tensor_handle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/eager/tensor_handle.h b/tensorflow/core/common_runtime/eager/tensor_handle.h index e4ccb11dba..ac99fdbe29 100644 --- a/tensorflow/core/common_runtime/eager/tensor_handle.h +++ b/tensorflow/core/common_runtime/eager/tensor_handle.h @@ -133,7 +133,7 @@ class TensorHandle : public core::RefCounted { private: // If the contents of the Tensor pointed to by this handle is yet to be - // computed by a EagerNode, this function will block till that compuatation is + // computed by a EagerNode, this function will block till that computation is // done and the handle is "ready". Status WaitReady(); Status WaitForNode(uint64 node_id, bool return_if_is_ready); -- GitLab From 8d4df0e4e9ea374f7efcadc77217591e4d0b03a8 Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 14 Feb 2019 10:16:12 +0530 Subject: [PATCH 117/351] Update hlo_instruction.h conssits -> consists --- tensorflow/compiler/xla/service/hlo_instruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d7469e1ac9..38141aa6d4 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -501,7 +501,7 @@ class HloInstruction { // Data is sent/received according to the (source_replica_id, // target_replica_id) pairs in `source_target_pairs`. If a replica id is not a // target_replica_id in any pair, the output on that replica is a tensor - // conssits of 0(s) in `shape`. + // consists of 0(s) in `shape`. static std::unique_ptr CreateCollectivePermute( const Shape& shape, HloInstruction* operand, const std::vector>& source_target_pairs); -- GitLab From 85f16e8729e0708e593d44e2bd470a306b69fa2b Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 14 Feb 2019 10:18:17 +0530 Subject: [PATCH 118/351] Update call_graph.h contex -> context --- tensorflow/compiler/xla/service/call_graph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/call_graph.h b/tensorflow/compiler/xla/service/call_graph.h index c02ffda575..57a636fd74 100644 --- a/tensorflow/compiler/xla/service/call_graph.h +++ b/tensorflow/compiler/xla/service/call_graph.h @@ -30,7 +30,7 @@ namespace xla { // The context in which a computation is called by another computation. enum class CallContext { - // In a parallel contex the computation is applied to each element of the + // In a parallel context the computation is applied to each element of the // array argument(s). kMap and kReduce instructions call computations in // parallel context. kParallel, -- GitLab From 5f8540852f04656de9fdb87a0dac98c1f90f3dc1 Mon Sep 17 00:00:00 2001 From: Siju Date: Thu, 14 Feb 2019 10:19:15 +0530 Subject: [PATCH 119/351] Update gpu_fusible.h conumser -> consumer --- tensorflow/compiler/xla/service/gpu/gpu_fusible.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h index e9d7ba1c4c..8c075c671d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_fusible.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_fusible.h @@ -48,7 +48,7 @@ bool IsInputFusibleReduction(const HloInstruction& instr); // Whether instruction shapes are compatible for multi-output fusion, i.e. // whether the emitters support lowering the resulting fusion. -// This function works for both, sibling and producer-conumser multi-output +// This function works for both, sibling and producer-consumser multi-output // fusion. // So far, multi-output fusion is supported for loop fusions and reduce // input fusions only. It is up to the caller to ensure the instructions -- GitLab From 71a9efdc66a4a5de68789ce11328bd295a27930c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 13 Feb 2019 20:49:39 -0800 Subject: [PATCH 120/351] Use tpu.rewrite instead of tpu.rewrite_for_inference with PartitionedCallOp in TPUEstimator.py. PiperOrigin-RevId: 233885147 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 988591499f..afe0a04d3b 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1363,7 +1363,7 @@ def call_computation(computation, # TPU core with every `Session.run()` call. Note that the entire inference # graph executes on a single core, and that invocations of this graph # will round-robin among the cores attached to a host. - @function.Defun() + @function.Defun(capture_resource_var_by_value=False) def tpu_subgraph(): return computation() @@ -2464,8 +2464,14 @@ class TPUEstimator(estimator_lib.Estimator): device_assignment = ctx.device_assignment else: device_assignment = None - tensors_on_cpu = tpu.rewrite_for_inference( - tpu_computation, device_assignment=device_assignment) + + if self._experimental_exported_model_uses_all_cores: + tensors_on_cpu = tpu.rewrite( + tpu_computation, device_assignment=device_assignment) + else: + tensors_on_cpu = tpu.rewrite_for_inference( + tpu_computation, device_assignment=device_assignment) + (estimator_spec, export_outputs_dict, export_outputs_list, predictions_dict) = ( tpu_capture.get()) -- GitLab From dd5e07c4c3d36d6447f0392fe83b51bfc320b1d1 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 13 Feb 2019 21:37:37 -0800 Subject: [PATCH 121/351] Remove constructor for ConvertSlice's TestParams --- .../compiler/tf2tensorrt/convert/convert_nodes_test.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index 43e537e3a1..1fb947248b 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -2972,16 +2972,6 @@ TEST_F(OpConverterTest, ConvertSlice) { } struct TestParams { - TestParams(const std::vector& input_dims, - const std::vector& begin, const std::vector& size, - const std::vector& expected_output_dims, - const std::vector& expected_output) - : input_dims(input_dims), - begin(begin), - size(size), - expected_output_dims(expected_output_dims), - expected_output(expected_output) {} - std::vector input_dims; std::vector begin; std::vector size; -- GitLab From 368674dfe253eef39537fcabbb675f12be47ab05 Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Wed, 13 Feb 2019 22:05:52 -0800 Subject: [PATCH 122/351] PR #25565: TFTRT: Support GatherV2 op Please approve this CL. It will be submitted automatically, and its GitHub pull request will be marked as merged. Imported from GitHub PR #25565 * Add support for GatherV2 Op. * Add unit tests * Add CheckAxis helper method (need to use in other functions, will issue separate PR for this) https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/gather-v2 The "scalar indices" case is currently unsupported because we don't allow scalar tensors, but they are now supported in TRT 5.0+ or maybe earlier, need to double check (TRT calls these 0-D tensors). We will have to update our code to allow these tensors in order to allow this case to convert. Will add support for constant (weight) indices with automatic weight -> tensor helper soon. Copybara import of the project: - bad24cee6eb7888912d8a582ae75981ecc893dce Support GatherV2 op. Add unit tests. Add CheckAxis helper... by Trevor Morris - bb1597436974e6c840d2da3af30292877f51e062 Fix formatting and add TODO for datatypes by Trevor Morris - 1899c671d97b7efcf1adb87a3f32158fd4603fcd Make templated test function for Gather to test all appli... by Trevor Morris - 48e7536c9a81415ac7487c68807e25d2c92a7a2c Improve ConvertAxis. Change TRT_ShapedWeights::ToVector -... by Trevor Morris - d5c684610c42b8c7e02112d270fd12afed2ed484 Remove unnecessary std::to_string by Trevor Morris - 6a34e557898ef1b71b3c78c18a929ffb7d11c2d4 Use absl::Span by Trevor Morris - ff7a35e6e131cdca7082af1b274b406545ece6fa Remove to_string. Remove unused check_bounds arg by Trevor Morris - 90d8c78e62b39eedadb9f3c8a6367fda6cf9120e Merge ff7a35e6e131cdca7082af1b274b406545ece6fa into 39a64... by Trevor Morris COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/25565 from trevor-m:tmorris_tftrt_gather ff7a35e6e131cdca7082af1b274b406545ece6fa PiperOrigin-RevId: 233892061 --- .../tf2tensorrt/convert/convert_graph.cc | 1 + .../tf2tensorrt/convert/convert_nodes.cc | 50 ++++++++ .../tf2tensorrt/convert/convert_nodes.h | 5 + .../tf2tensorrt/convert/convert_nodes_test.cc | 120 ++++++++++++++++++ 4 files changed, 176 insertions(+) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index beb87b6c24..0b0cb0db8e 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -105,6 +105,7 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) { "ExpandDims", "FusedBatchNorm", "FusedBatchNormV2", + "GatherV2", "Identity", "LeakyRelu", "Log", diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index cce937a253..002526c04b 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -378,6 +378,32 @@ tensorflow::Status CreateBroadcastableScalarConstant( return Status::OK(); } +// Convert an axis from TF format to TRT format while validating. TF format +// includes the batch dimension, while TRT does not. TF can also use negative +// indices. +// TODO(tmorris): Use this method in more ops. +tensorflow::Status ConvertAxis(int tf_axis, int trt_nb_dims, + absl::string_view node_name, int* trt_axis) { + const int tf_nb_dims = trt_nb_dims + 1; + // Check bounds. + if (tf_axis < -tf_nb_dims || tf_axis >= tf_nb_dims) { + return tensorflow::errors::InvalidArgument( + "Axis value of ", tf_axis, " is out of bounds, must be in range [", + -tf_nb_dims, ", ", tf_nb_dims, "), at ", node_name); + } + // Make negative axis positive. + if (tf_axis < 0) tf_axis += tf_nb_dims; + // Don't allow axis to be the batch dimension. + if (tf_axis == 0) { + return tensorflow::errors::Unimplemented( + "TensorRT does not allow manipulation of the batch dimension, at ", + node_name); + } + // Remove batch dimension. + *trt_axis = tf_axis - 1; + return Status::OK(); +} + inline bool DimsEqual(const nvinfer1::Dims& dim_l, const nvinfer1::Dims& dim_r) { if (dim_l.nbDims != dim_r.nbDims) { @@ -3412,6 +3438,29 @@ tensorflow::Status ConvertFusedBatchNorm(OpConverterParams* params) { return tensorflow::Status::OK(); } +tensorflow::Status ConvertGather(OpConverterParams* params) { + const auto& inputs = params->inputs; + const auto& node_def = params->node_def; + TF_RETURN_IF_ERROR(CheckInputsWeights( + *params, {{"params", false}, {"indices", false}, {"axis", true}})); + absl::Span axis = inputs.at(2).weights().GetSpan(); + if (axis.size() != 1) { + return tensorflow::errors::InvalidArgument( + "Axis for GatherV2 must be a scalar, at ", node_def.name()); + } + int trt_axis = 0; + TF_RETURN_IF_ERROR(ConvertAxis(axis[0], inputs.at(0).GetTrtDims().nbDims, + node_def.name(), &trt_axis)); + if (params->validation_only) return Status::OK(); + + nvinfer1::IGatherLayer* layer = params->converter->network()->addGather( + *const_cast(inputs.at(0).tensor()), + *const_cast(inputs.at(1).tensor()), trt_axis); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + params->outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0))); + return Status::OK(); +} + tensorflow::Status ConvertMatMulHelper(OpConverterParams* params, TRT_TensorOrWeights tensor_input, TRT_ShapedWeights weights_raw, @@ -3642,6 +3691,7 @@ static void RegisterValidatableOpConverters( (*registration)["Conv2DBackpropInput"] = ConvertConv2DBackpropInput; (*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; (*registration)["ExpandDims"] = ConvertExpandDims; + (*registration)["GatherV2"] = ConvertGather; (*registration)["LeakyRelu"] = ConvertLeakyRelu; (*registration)["MatMul"] = ConvertMatMul; (*registration)["Pad"] = ConvertPad; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h index d1e30eb848..cbba01ba57 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h @@ -190,6 +190,11 @@ class TRT_ShapedWeights { string DebugString() const; + template + absl::Span GetSpan() const { + return absl::Span(tensor_.flat().data(), count()); + } + // TODO(aaroey): make these private. nvinfer1::Dims shape_; // Note: shape.type[] is not used. tensorflow::DataType type_; diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc index bb1341ada3..bb6fc7f0e4 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc @@ -3129,6 +3129,126 @@ TEST_F(OpConverterTest, ConvertTopK) { } } +template +void TestConvertGather(OpConverterTest* test) { + typedef typename EnumToDataType::Type CType; + + // Get the NodeDef for GatherV2. + Scope s = Scope::NewRootScope(); + auto params = ops::Placeholder(s.WithOpName("params"), dtype); + auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32); + auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32); + auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis); + const NodeDef& node_def = gather.operation.node()->def(); + + struct TestParams { + std::vector params_dims; + std::vector indices_dims; + std::vector indices; + int axis; + std::vector expected_output_dims; + std::vector expected_output; + }; + + // Input is the same {1, 2, 3, 4, 5, 6} for all cases. + const int kGatherOKCases = 5; + TestParams ok_params[kGatherOKCases] = { + // Vector indices (output is rank(params)). + TestParams{{1, 2, 3}, {1}, {0}, 3, {1, 2, 1}, {1, 4}}, + TestParams{{1, 2, 3}, {1}, {1}, 3, {1, 2, 1}, {2, 5}}, + TestParams{{1, 2, 3}, {1}, {2}, -1, {1, 2, 1}, {3, 6}}, + TestParams{{1, 2, 3}, {3}, {2, 0, 1}, 3, {1, 2, 3}, {3, 1, 2, 6, 4, 5}}, + // Higher rank indices (output is rank(params) + rank(indices) - 1). + TestParams{{1, 2, 3}, {1, 1}, {0}, 2, {1, 1, 1, 3}, {1, 2, 3}}, + }; + + // Ok. + for (int i = 0; i < kGatherOKCases; i++) { + test->Reset(); + test->AddTestTensor("params", ok_params[i].params_dims, 1, + TfDataTypeToTrt(dtype)); + test->AddTestTensor("indices", ok_params[i].indices_dims, 1, + nvinfer1::DataType::kINT32); + test->AddTestWeights("axis", {1}, {ok_params[i].axis}); + test->RunValidationAndConversion(node_def); + TRT_TensorOrWeights output; + TF_EXPECT_OK(test->GetTensorOrWeights("my_gather", &output)); + EXPECT_TRUE(output.is_tensor()); + ExpectTrtDimsEqualsArray(ok_params[i].expected_output_dims, + output.tensor()->getDimensions()); + + // Create input in CType and convert expected output to CType. + std::vector inputs = {CType(1), CType(2), CType(3), + CType(4), CType(5), CType(6)}; + std::vector converted_expected_output( + ok_params[i].expected_output.begin(), + ok_params[i].expected_output.end()); + + const DataVec input_data{ + {"params", test::AsTensor(inputs)}, + {"indices", test::AsTensor(ok_params[i].indices)}}; + DataVec output_data{ + {"my_gather", + ConstructTensor(ok_params[i].expected_output.size())}}; + test->BuildAndRun(input_data, &output_data); + EXPECT_THAT(GetSpanForData(output_data[0]), + ElementsAreArray(converted_expected_output)); + } +} + +TEST_F(OpConverterTest, ConvertGather) { + { + // Input list is empty, should fail. + NodeDef node_def = MakeNodeDef("my_gather", "GatherV2", {}); + RunValidationAndConversion( + node_def, error::INVALID_ARGUMENT, + "GatherV2 got 0 inputs but expected 3, at my_gather"); + } + + // Get the NodeDef for GatherV2. + Scope s = Scope::NewRootScope(); + auto params = ops::Placeholder(s.WithOpName("params"), DT_FLOAT); + auto indices = ops::Placeholder(s.WithOpName("indices"), DT_INT32); + auto axis = ops::Placeholder(s.WithOpName("axis"), DT_INT32); + auto gather = ops::GatherV2(s.WithOpName("my_gather"), params, indices, axis); + const NodeDef& node_def = gather.operation.node()->def(); + { + // Axis is a tensor, should fail. + Reset(); + AddTestTensor("params", {1, 2, 3}); + AddTestTensor("indices", {2}); + AddTestTensor("axis", {1}); + RunValidationAndConversion( + node_def, error::UNIMPLEMENTED, + "The input \"axis\" for GatherV2 must be a constant, at my_gather"); + } + { + // Axis is out of bounds, should fail. + Reset(); + AddTestTensor("params", {1, 2, 3}); + AddTestTensor("indices", {2}); + AddTestWeights("axis", {1}, {4}); + RunValidationAndConversion(node_def, error::INVALID_ARGUMENT, + "Axis value of 4 is out of bounds, must be in " + "range [-4, 4), at my_gather"); + } + { + // Axis is batch dimension, should fail. + Reset(); + AddTestTensor("params", {1, 2, 3}); + AddTestTensor("indices", {2}); + AddTestWeights("axis", {1}, {0}); + RunValidationAndConversion(node_def, error::UNIMPLEMENTED, + "TensorRT does not allow manipulation of the " + "batch dimension, at my_gather"); + } + + Reset(); + TestConvertGather(this); + TestConvertGather(this); + TestConvertGather(this); +} + } // namespace convert } // namespace tensorrt } // namespace tensorflow -- GitLab From 59799045033e6357cb9daf6861baf7b005746400 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Wed, 13 Feb 2019 22:41:38 -0800 Subject: [PATCH 123/351] Simplify TensorShape v1/v2 switch (and fix bugs in it). Note that this means that whenever v2 behavior is enabled, all functions (including compat.v1 functions) will return v2 TensorShapes. TensorShape behavior is switched as a global, and not per function. PiperOrigin-RevId: 233894962 --- .../python/data/experimental/ops/batching.py | 6 ++-- tensorflow/python/framework/tensor_shape.py | 28 ++++--------------- tensorflow/python/ops/nn_ops.py | 8 +++--- .../saved_model/nested_structure_coder.py | 5 +--- .../golden/v1/tensorflow.-tensor-shape.pbtxt | 2 +- .../golden/v2/tensorflow.-tensor-shape.pbtxt | 3 +- 6 files changed, 15 insertions(+), 37 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py index 9c79af2464..983f7640b8 100644 --- a/tensorflow/python/data/experimental/ops/batching.py +++ b/tensorflow/python/data/experimental/ops/batching.py @@ -718,15 +718,15 @@ class _RebatchDataset(dataset_ops.UnaryDataset): """Recalculates the output_shapes after dividing it by num_workers.""" if len(output_shapes) < 1: raise ValueError("Input shape should have at least one dimension.") - if (output_shapes.dims[0].value and - output_shapes.dims[0].value % num_workers != 0): + if (tensor_shape.dimension_value(output_shapes[0]) and + tensor_shape.dimension_value(output_shapes[0]) % num_workers != 0): raise errors.InvalidArgumentError( None, None, "First dim of input shape: %d is not divisible by num_workers: %d" % (output_shapes[0], num_workers)) output_dims = [d for d in output_shapes.dims] output_dims[0] = output_dims[0] // num_workers - return tensor_shape.TensorShapeV1(output_dims) + return tensor_shape.TensorShape(output_dims) output_shapes = nest.map_structure(recalculate_output_shapes, input_dataset.output_shapes) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index a7537bb5f1..40fccc86a3 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -74,9 +74,8 @@ def enable_v2_tensorshape(): # in `tensor_shape[i]`, but they would not be. ``` """ - global _TENSORSHAPE_V2_OVERRIDE, TensorShape # pylint: disable=invalid-name + global _TENSORSHAPE_V2_OVERRIDE # pylint: disable=invalid-name _TENSORSHAPE_V2_OVERRIDE = True - TensorShape = TensorShapeV2 @tf_export(v1=["disable_v2_tensorshape"]) @@ -85,9 +84,8 @@ def disable_v2_tensorshape(): See docstring for `enable_v2_tensorshape` for details about the new behavior. """ - global _TENSORSHAPE_V2_OVERRIDE, TensorShape # pylint: disable=invalid-name + global _TENSORSHAPE_V2_OVERRIDE # pylint: disable=invalid-name _TENSORSHAPE_V2_OVERRIDE = False - TensorShape = TensorShapeV1 @tf_export("compat.dimension_value", @@ -635,8 +633,8 @@ def as_dimension(value): return Dimension(value) -@tf_export(v1=["TensorShape"]) -class TensorShapeV1(object): +@tf_export("TensorShape") +class TensorShape(object): """Represents the shape of a `Tensor`. A `TensorShape` represents a possibly-partial shape specification for a @@ -695,7 +693,7 @@ class TensorShapeV1(object): @property def _v2_behavior(self): if _TENSORSHAPE_V2_OVERRIDE is None: - return False + return tf2.enabled() return _TENSORSHAPE_V2_OVERRIDE def __repr__(self): @@ -1151,22 +1149,6 @@ def unknown_shape(rank=None, **kwargs): return TensorShape([Dimension(None)] * rank) -@tf_export("TensorShape", v1=[]) -class TensorShapeV2(TensorShapeV1): - - @property - def _v2_behavior(self): - if _TENSORSHAPE_V2_OVERRIDE is None: - return True - return _TENSORSHAPE_V2_OVERRIDE - - -if tf2.enabled(): - TensorShape = TensorShapeV2 -else: - TensorShape = TensorShapeV1 - - def scalar(): """Returns a shape representing a scalar.""" return TensorShape([]) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a7aa97c2e4..cf4aa51b6e 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -928,16 +928,16 @@ def convolution_internal( name=None): """Internal function which performs rank agnostic convolution.""" with ops.name_scope(name, "convolution", [input, filters]) as name: - if isinstance(input.shape, tensor_shape.TensorShapeV1) and \ + if isinstance(input.shape, tensor_shape.TensorShape) and \ input.shape.rank is not None: n = len(input.shape) - 2 - elif not isinstance(input.shape, tensor_shape.TensorShapeV1) and \ + elif not isinstance(input.shape, tensor_shape.TensorShape) and \ input.shape is not None: n = len(input.shape) - 2 - elif isinstance(filters.shape, tensor_shape.TensorShapeV1) and \ + elif isinstance(filters.shape, tensor_shape.TensorShape) and \ filters.shape.rank is not None: n = len(filters.shape) - 2 - elif not isinstance(filters.shape, tensor_shape.TensorShapeV1) and \ + elif not isinstance(filters.shape, tensor_shape.TensorShape) and \ filters.shape is not None: n = len(filters.shape) - 2 else: diff --git a/tensorflow/python/saved_model/nested_structure_coder.py b/tensorflow/python/saved_model/nested_structure_coder.py index 5cf9a5b155..3d335de555 100644 --- a/tensorflow/python/saved_model/nested_structure_coder.py +++ b/tensorflow/python/saved_model/nested_structure_coder.py @@ -361,10 +361,7 @@ class _TensorShapeCodec(object): """Codec for `TensorShape`.""" def can_encode(self, pyobj): - return isinstance(pyobj, (tensor_shape.TensorShape, - # TODO(b/121255889): Should not need these. - tensor_shape.TensorShapeV1, - tensor_shape.TensorShapeV2)) + return isinstance(pyobj, tensor_shape.TensorShape) def do_encode(self, tensor_shape_value, encode_fn): del encode_fn diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt index d11e927bd5..60518ffadc 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-tensor-shape.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.TensorShape" tf_class { - is_instance: "" + is_instance: "" is_instance: "" member { name: "dims" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt index bee19520b7..60518ffadc 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-tensor-shape.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.TensorShape" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" member { name: "dims" -- GitLab From 4dbec11094abacbcc62eec54e12d8b6a965ae64b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 00:22:13 -0800 Subject: [PATCH 124/351] [TF:XLA] Add image_ops tests to run on XLA Approximately 10 tests were disabled on XLA due to some image ops not supporting align_corners=False in the TF2XLA bridge. PiperOrigin-RevId: 233903589 --- tensorflow/python/BUILD | 2 ++ tensorflow/python/keras/BUILD | 1 + tensorflow/python/keras/layers/convolutional_test.py | 2 ++ tensorflow/python/ops/image_grad_test.py | 2 ++ tensorflow/python/ops/image_ops_test.py | 4 ++++ 5 files changed, 11 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bd31614a92..94b11feb82 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3543,6 +3543,7 @@ cuda_py_test( ":image_ops", "//third_party/py/numpy", ], + xla_enable_strict_auto_jit = True, ) cuda_py_test( @@ -3568,6 +3569,7 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, + xla_enable_strict_auto_jit = True, ) cuda_py_test( diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 4f710ff658..1ff5ced615 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -582,6 +582,7 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, + xla_enable_strict_auto_jit = True, ) cuda_py_test( diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 9140ce426e..24b61feec2 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -23,6 +23,7 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.eager import context +from tensorflow.python.framework import test_util from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.platform import test @@ -420,6 +421,7 @@ class ZeroPaddingTest(keras_parameterized.TestCase): keras.layers.ZeroPadding3D(padding=None) +@test_util.disable_all_xla('b/124289666') # align_corners=False unimplemented @keras_parameterized.run_all_keras_modes class UpSamplingTest(keras_parameterized.TestCase): diff --git a/tensorflow/python/ops/image_grad_test.py b/tensorflow/python/ops/image_grad_test.py index c481266dd7..f363f1b24a 100644 --- a/tensorflow/python/ops/image_grad_test.py +++ b/tensorflow/python/ops/image_grad_test.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import image_ops from tensorflow.python.platform import test +@test_util.disable_all_xla("b/124289666") # align_corners=False unimplemented class ResizeNearestNeighborOpTest(test.TestCase): TYPES = [np.float32, np.float64] @@ -149,6 +150,7 @@ class ResizeBilinearOpTest(test.TestCase): self.assertLess(err, 1e-3) @test_util.run_deprecated_v1 + @test_util.disable_xla("b/124290659") # align_corners=False unimplemented def testCompareGpuVsCpu(self): in_shape = [2, 4, 6, 3] out_shape = [2, 8, 16, 3] diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index b032d6454d..75fa73e1e4 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -2399,6 +2399,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): self.assertAllEqual(img_shape, newshape) self.assertAllClose(resized, img_np, atol=1e-5) + @test_util.disable_xla("b/124289666") # align_corners=False unimplemented def testResizeDown(self): # This test is also conducted with int8, so 127 is the maximum # value that can be used. @@ -2430,6 +2431,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): resized = self.evaluate(y) self.assertAllClose(resized, expected, atol=1e-5) + @test_util.disable_xla("b/124289666") # align_corners=False unimplemented def testResizeUpAlignCornersFalse(self): img_shape = [1, 3, 2, 1] data = [64, 32, 32, 64, 50, 100] @@ -2467,6 +2469,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): [1, target_height, target_width, 1]) self.assertAllClose(resized, expected, atol=1e-05) + @test_util.disable_xla("b/124291162") # Incorrect literal type def testResizeUpAlignCornersTrue(self): img_shape = [1, 3, 2, 1] data = [6, 3, 3, 6, 6, 9] @@ -2553,6 +2556,7 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): resized = self.evaluate(y) self.assertAllClose(resized, expected, atol=1) + @test_util.disable_xla("b/124289666") # align_corners=False unimplemented def testCompareNearestNeighbor(self): if test.is_gpu_available(): input_shape = [1, 5, 6, 3] -- GitLab From 58dcf697b6b3c9d6dab6bb4ec71dec3b33dc6611 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 01:02:31 -0800 Subject: [PATCH 125/351] compat: Update forward compatibility horizon to 2019-02-14 PiperOrigin-RevId: 233907852 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 09d4920e74..1a98e0725d 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 13) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 14) @tf_export("compat.forward_compatible") -- GitLab From 880cc8c1366dd27b5c7e44c7d5b4ab5270ebd1cb Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Thu, 14 Feb 2019 01:22:46 -0800 Subject: [PATCH 126/351] Add an HLO Pass to make Sort stable. Also add a new is_stable boolean to the Sort HLO. The pass either adds a Iota operand to the sort op or reuses such an operand if there is one that can be used. Then the comparison computation is modified to add a tie breaker in case the comparison determines that the relevant values used in the comparison computation are equal. PiperOrigin-RevId: 233911034 --- tensorflow/compiler/xla/client/lib/sorting.cc | 3 +- .../compiler/xla/client/lib/sorting_test.cc | 4 +- tensorflow/compiler/xla/client/xla_builder.cc | 11 +- tensorflow/compiler/xla/client/xla_builder.h | 10 +- tensorflow/compiler/xla/service/BUILD | 31 ++ .../xla/service/algebraic_simplifier_test.cc | 8 +- .../service/bfloat16_normalization_test.cc | 6 +- .../compiler/xla/service/cpu/ir_emitter.cc | 6 +- .../xla/service/cpu/runtime_key_value_sort.cc | 41 +- .../xla/service/cpu/runtime_key_value_sort.h | 15 +- tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../xla/service/gpu/nvptx_compiler.cc | 3 + tensorflow/compiler/xla/service/hlo.proto | 5 +- .../xla/service/hlo_creation_utils.cc | 4 +- .../compiler/xla/service/hlo_creation_utils.h | 4 +- .../xla/service/hlo_dataflow_analysis_test.cc | 6 +- .../compiler/xla/service/hlo_instruction.cc | 7 +- .../compiler/xla/service/hlo_instruction.h | 5 +- .../compiler/xla/service/hlo_instructions.cc | 22 +- .../compiler/xla/service/hlo_instructions.h | 4 +- tensorflow/compiler/xla/service/hlo_parser.cc | 7 +- .../compiler/xla/service/hlo_parser_test.cc | 18 + .../compiler/xla/service/op_expander_pass.cc | 3 + .../compiler/xla/service/op_expander_pass.h | 4 +- .../xla/service/stable_sort_expander.cc | 204 ++++++++++ .../xla/service/stable_sort_expander.h | 42 ++ .../xla/service/stable_sort_expander_test.cc | 358 ++++++++++++++++++ .../service/tuple_points_to_analysis_test.cc | 6 +- 28 files changed, 769 insertions(+), 69 deletions(-) create mode 100644 tensorflow/compiler/xla/service/stable_sort_expander.cc create mode 100644 tensorflow/compiler/xla/service/stable_sort_expander.h create mode 100644 tensorflow/compiler/xla/service/stable_sort_expander_test.cc diff --git a/tensorflow/compiler/xla/client/lib/sorting.cc b/tensorflow/compiler/xla/client/lib/sorting.cc index 3245f46e6f..ddc39f4d87 100644 --- a/tensorflow/compiler/xla/client/lib/sorting.cc +++ b/tensorflow/compiler/xla/client/lib/sorting.cc @@ -36,7 +36,8 @@ XlaOp TopK(XlaOp input, int64 k) { XlaOp sort_result = Sort({Neg(input), iota_s32}, CreateScalarLtComputation({input_shape.element_type(), S32}, - iota_s32.builder())); + iota_s32.builder()), + last_dim, /*is_stable=*/true); std::vector start_indices(input_shape.dimensions_size(), 0); std::vector limit_indices(input_dims.begin(), input_dims.end()); limit_indices[last_dim] = k; diff --git a/tensorflow/compiler/xla/client/lib/sorting_test.cc b/tensorflow/compiler/xla/client/lib/sorting_test.cc index ae78910a5b..0fbd138aca 100644 --- a/tensorflow/compiler/xla/client/lib/sorting_test.cc +++ b/tensorflow/compiler/xla/client/lib/sorting_test.cc @@ -81,9 +81,7 @@ XLA_TEST_F(SortingTest, TopKFullSort) { ComputeAndCompareR1(&builder, inputs, {}); } -// TODO(b/122298745): Enable this test when the GPU backend supports stable -// sorting. -XLA_TEST_F(SortingTest, DISABLED_ON_GPU(TopKFullSortWithDuplicates)) { +XLA_TEST_F(SortingTest, TopKFullSortWithDuplicates) { XlaBuilder builder(TestName()); XlaOp a; auto a_data = CreateR1Parameter({1, 1, 2, 2, 1}, 0, "a", &builder, &a); diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index fb9dbe851e..b371b5af37 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -1663,14 +1663,16 @@ XlaOp XlaBuilder::Sort(const XlaOp& keys, absl::Span values, Lt(first_lhs_param, first_rhs_param); TF_ASSIGN_OR_RETURN(auto comparator, b->Build()); - return Sort(operands, comparator, dimension); + return Sort(operands, comparator, dimension, /*is_stable=*/false); }); } XlaOp XlaBuilder::Sort(absl::Span operands, - const XlaComputation& comparator, int64 dimension) { + const XlaComputation& comparator, int64 dimension, + bool is_stable) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; + instr.set_is_stable(is_stable); std::vector operand_shape_ptrs; TF_ASSIGN_OR_RETURN(std::vector operand_shapes, GetOperandShapes(operands)); @@ -3320,8 +3322,9 @@ XlaOp Sort(const XlaOp& keys, absl::Span values, int64 dimension) { } XlaOp Sort(absl::Span operands, const XlaComputation& comparator, - int64 dimension) { - return operands[0].builder()->Sort(operands, comparator, dimension); + int64 dimension, bool is_stable) { + return operands[0].builder()->Sort(operands, comparator, dimension, + is_stable); } XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) { diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index 1e39c8766f..fd2e9816e8 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -505,7 +505,7 @@ class XlaBuilder { XlaOp Sort(const XlaOp& keys, absl::Span values = {}, int64 dimension = -1); XlaOp Sort(absl::Span operands, const XlaComputation& comparator, - int64 dimension = -1); + int64 dimension = -1, bool is_stable = false); XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max); @@ -923,7 +923,8 @@ class XlaBuilder { friend XlaOp Sort(const XlaOp& keys, absl::Span values, int64 dimension); friend XlaOp Sort(absl::Span operands, - const XlaComputation& comparator, int64 dimension); + const XlaComputation& comparator, int64 dimension, + bool is_stable); friend XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max); friend XlaOp Map(XlaBuilder* builder, absl::Span operands, const XlaComputation& computation, @@ -1695,7 +1696,8 @@ XlaOp Sort(const XlaOp& keys, absl::Span values = {}, int64 dimension = -1); // Enqueues a sort instruction onto the computation, using 'comparator' for -// comparisons. 'comparator' needs to define a strict weak order. +// comparisons. 'comparator' needs to define a strict weak order. 'is_stable' +// determines whether the stable sorting should be used. // If only one operand is provided: // * If the operand is a rank-1 tensor (an array), the result is a sorted array. // The resulting sorting order has the property that for all index positions @@ -1718,7 +1720,7 @@ XlaOp Sort(const XlaOp& keys, absl::Span values = {}, // correspond to the value of operand i at two index positions. // Default comparator computations can be found in lib/comparators.h XlaOp Sort(absl::Span operands, const XlaComputation& comparator, - int64 dimension = -1); + int64 dimension = -1, bool is_stable = false); // Enqueues a clamp instruction onto the computation. XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index a5eae6d396..33ac51ca4b 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -3529,6 +3529,37 @@ tf_cc_test( ], ) +cc_library( + name = "stable_sort_expander", + srcs = ["stable_sort_expander.cc"], + hdrs = ["stable_sort_expander.h"], + deps = [ + ":hlo", + ":hlo_casting_utils", + ":hlo_pass", + ":op_expander_pass", + "//tensorflow/compiler/xla:statusor", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/container:flat_hash_set", + ], +) + +tf_cc_test( + name = "stable_sort_expander_test", + srcs = ["stable_sort_expander_test.cc"], + deps = [ + ":algebraic_simplifier", + ":hlo_matchers", + ":hlo_parser", + ":pattern_matcher", + ":pattern_matcher_gmock", + ":stable_sort_expander", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/core:test", + ], +) + cc_library( name = "tuple_util", srcs = ["tuple_util.cc"], diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 7743979e3f..d959fafc0c 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2753,8 +2753,9 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSort) { Shape keys_shape = ShapeUtil::MakeShape(F32, {1}); auto keys = builder.AddInstruction( HloInstruction::CreateParameter(0, keys_shape, "keys")); - TF_ASSERT_OK( - MakeSortHlo(keys_shape, {keys}, 0, &builder, module.get()).status()); + TF_ASSERT_OK(MakeSortHlo(keys_shape, {keys}, 0, /*is_stable=*/false, &builder, + module.get()) + .status()); HloComputation* computation = module->AddEntryComputation(builder.Build()); AlgebraicSimplifier simplifier(default_options_); ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); @@ -2775,7 +2776,8 @@ TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) { HloInstruction::CreateParameter(2, values_shape, "values1")); TF_ASSERT_OK(MakeSortHlo(ShapeUtil::MakeTupleShape( {keys_shape, values_shape, values_shape}), - {keys, values0, values1}, 0, &builder, module.get()) + {keys, values0, values1}, 0, /*is_stable=*/false, + &builder, module.get()) .status()); HloComputation* computation = module->AddEntryComputation(builder.Build()); AlgebraicSimplifier simplifier(default_options_); diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index 2591ff602c..2caa979745 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -286,7 +286,8 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) { TF_ASSERT_OK_AND_ASSIGN( auto* sort, MakeSortHlo(ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), - {key, value}, 0, &builder, module.get())); + {key, value}, 0, /*is_stable=*/false, &builder, + module.get())); HloInstruction* gte = builder.AddInstruction( HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0)); @@ -314,7 +315,8 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSortRoot) { TF_ASSERT_OK_AND_ASSIGN( auto* sort, MakeSortHlo(ShapeUtil::MakeTupleShape({bf16_shape, f32_shape}), - {key, value}, 0, &builder, module.get())); + {key, value}, 0, /*is_stable=*/false, &builder, + module.get())); auto computation = module->AddEntryComputation(builder.Build()); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 5abb3eb387..9967cf28ee 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -583,7 +583,7 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) { b_.getVoidTy(), {b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt64Ty(), b_.getInt8PtrTy()->getPointerTo(), b_.getInt32Ty(), - b_.getInt32Ty()->getPointerTo(), b_.getInt8PtrTy(), + b_.getInt32Ty()->getPointerTo(), b_.getInt1Ty(), b_.getInt8PtrTy(), b_.getInt64Ty()->getPointerTo(), less_than_function->getType()}, /*isVarArg=*/false); auto* key_value_sort_func = llvm::dyn_cast( @@ -616,8 +616,8 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) { {b_.getInt64(higher_dimensions), b_.getInt64(sort_dimension_elements), b_.getInt64(lower_dimensions), values, b_.getInt32(sort->operand_count()), sizes, - GetExecutableRunOptionsArgument(), GetProfileCountersArgument(), - less_than_function}); + b_.getInt1(sort->is_stable()), GetExecutableRunOptionsArgument(), + GetProfileCountersArgument(), less_than_function}); if (sort->values_count() > 0) { llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc index cb46674138..70a6d0af02 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.cc @@ -32,8 +32,8 @@ using tensorflow::int64; TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( int64 a, int64 b, int64 c, char** values, int32 values_count, - int32* values_primitive_type_size_in_bytes, char* run_options, - int64* prof_counters, + int32* values_primitive_type_size_in_bytes, bool is_stable, + char* run_options, int64* prof_counters, void (*less_than)(char*, char*, char**, char**, tensorflow::int64*)) { // 'values' and 'values_primitive_type_size_in_bytes' are managed by the JIT // code, so msan can't tell they are initialized. @@ -69,22 +69,27 @@ TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_KeyValueSort( int64 base_offset = index % sort_dimension_offset + (index - index % sort_dimension_offset) * sort_dimension_elements; - std::stable_sort( - indices.get(), indices.get() + sort_dimension_elements, - [&](int64 a, int64 b) -> bool { - int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) * - values_primitive_type_size_in_bytes[0]; - int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) * - values_primitive_type_size_in_bytes[0]; - for (int32 i = 0; i < values_count; ++i) { - comparison_values[i * 2] = values[i] + memory_index_lhs; - comparison_values[i * 2 + 1] = values[i] + memory_index_rhs; - } - char result = 0; // Overwritten by less_than. - less_than(&result, run_options, comparison_values.get(), nullptr, - prof_counters); - return result != 0u; - }); + auto compare_function = [&](int64 a, int64 b) -> bool { + int64 memory_index_lhs = (base_offset + a * sort_dimension_offset) * + values_primitive_type_size_in_bytes[0]; + int64 memory_index_rhs = (base_offset + b * sort_dimension_offset) * + values_primitive_type_size_in_bytes[0]; + for (int32 i = 0; i < values_count; ++i) { + comparison_values[i * 2] = values[i] + memory_index_lhs; + comparison_values[i * 2 + 1] = values[i] + memory_index_rhs; + } + char result = 0; // Overwritten by less_than. + less_than(&result, run_options, comparison_values.get(), nullptr, + prof_counters); + return result != 0u; + }; + if (is_stable) { + std::stable_sort(indices.get(), indices.get() + sort_dimension_elements, + compare_function); + } else { + std::sort(indices.get(), indices.get() + sort_dimension_elements, + compare_function); + } // Reorder the values according to the order defined by 'indices'. for (int32 idx = 0; idx < values_count; ++idx) { diff --git a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h index 4813de9ee6..50c2911c3b 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h @@ -22,15 +22,14 @@ limitations under the License. extern "C" { // Each entry in 'values' represents a 3-dimensional shape with dimensions -// [a, b, c]. The 'b' dimension of the first shape is sorted into ascending -// order according to the results of comparisons using the provided 'less_than' +// [a, b, c]. The 'b' dimension of each shape is sorted into ascending order +// according to the results of comparisons using the provided 'less_than' // function. 'values_count' must be > 0 and specifies the number of entries in // 'values' and 'values_primitive_type_size_in_bytes'. The size of the primitive // type of the i-th shape has exactly 'values_primitive_type_size_in_bytes[i]' -// bytes. The elements in each 'values' shape are reordered in the same way -// according to the comparisons using the first shape. 'run_options' and -// 'prof_counters' are passed through to the less-than function, which expects -// the following arguments: +// bytes. 'is_stable' specifies whether the sorting should be stable. +// 'run_options' and 'prof_counters' are passed through to the less-than +// function, which expects the following arguments: // - pointer to the return value buffer (char*) // - xla::ExecutableRunOptions = 'run_options' (char*) // - pointers to the parameter buffers (char**) @@ -39,8 +38,8 @@ extern "C" { extern void __xla_cpu_runtime_KeyValueSort( tensorflow::int64 a, tensorflow::int64 b, tensorflow::int64 c, char** values, tensorflow::int32 values_count, - tensorflow::int32* values_primitive_type_size_in_bytes, char* run_options, - tensorflow::int64* prof_counters, + tensorflow::int32* values_primitive_type_size_in_bytes, bool is_stable, + char* run_options, tensorflow::int64* prof_counters, void (*less_than)(char*, char*, char**, char**, tensorflow::int64*)); } diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 05980fe549..25c4f70d89 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -765,6 +765,7 @@ cc_library( "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/service:reshape_mover", "//tensorflow/compiler/xla/service:sort_simplifier", + "//tensorflow/compiler/xla/service:stable_sort_expander", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/compiler/xla/service:while_loop_constant_sinking", diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index 9c8a181604..6e00e4b4ff 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -82,6 +82,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" #include "tensorflow/compiler/xla/service/sort_simplifier.h" +#include "tensorflow/compiler/xla/service/stable_sort_expander.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" @@ -195,6 +196,8 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, pipeline.AddPass( cost_model, /*convert_batch_groups_only=*/true); + // Expand the sort op to support stable sorting if required. + pipeline.AddPass(); // Convert BF16 operations to F32 operations so that the GPU backend can // support BF16 operations without directly implementing a BF16 lowering for // most ops. diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 6e64549e7e..d2c995d87a 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 60 +// Next ID: 61 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -175,6 +175,9 @@ message HloInstructionProto { // partners. bool is_host_transfer = 47; + // Whether this Sort instruction should be stable. + bool is_stable = 60; + xla.ScatterDimensionNumbers scatter_dimension_numbers = 48; // Precision configuration for the instruction. Has backend-specific meaning. diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 070115604b..b5d9e8e7f1 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -275,7 +275,7 @@ StatusOr MakeSelectHlo(HloInstruction* pred, StatusOr MakeSortHlo( const Shape& sort_shape, absl::Span operands, - int64 dimension_to_sort, HloComputation::Builder* builder, + int64 dimension_to_sort, bool is_stable, HloComputation::Builder* builder, HloModule* module) { CHECK(!operands.empty()) << "Sort Hlo requires at least one operand."; HloComputation* compare_computation; @@ -293,7 +293,7 @@ StatusOr MakeSortHlo( compare_computation = module->DeepCloneComputation(new_module->entry_computation(), &context); return builder->AddInstruction(HloInstruction::CreateSort( - sort_shape, dimension_to_sort, operands, compare_computation)); + sort_shape, dimension_to_sort, operands, compare_computation, is_stable)); } StatusOr CollapseFirstNDims(HloInstruction* operand, int64 n) { diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h index 36b8cdc7fe..17b7a2da6a 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.h +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h @@ -126,10 +126,10 @@ StatusOr MakeSelectHlo(HloInstruction* pred, // Creates a Sort HLO instruction and adds it to the computation containing the // operands. All operands must be in the same computation. Also creates a // default compare sub-computation which sorts the first operand into ascending -// order. +// order. 'is_stable' specifies whether the sorting should be stable. StatusOr MakeSortHlo( const Shape& sort_shape, absl::Span operands, - int64 dimension_to_sort, HloComputation::Builder* builder, + int64 dimension_to_sort, bool is_stable, HloComputation::Builder* builder, HloModule* module); // Creates an R1 Constant HLO instruction of the given PrimitiveType with the diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index e3059e02cf..768e3afb3b 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -2363,7 +2363,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto keys = builder.AddInstruction( HloInstruction::CreateParameter(0, keys_shape, "keys")); TF_ASSERT_OK_AND_ASSIGN( - auto* sort, MakeSortHlo(keys_shape, {keys}, -1, &builder, module_.get())); + auto* sort, MakeSortHlo(keys_shape, {keys}, -1, /*is_stable=*/false, + &builder, module_.get())); computation_ = module_->AddEntryComputation(builder.Build()); RunAnalysis(); @@ -2385,7 +2386,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { TF_ASSERT_OK_AND_ASSIGN( auto* sort, MakeSortHlo(ShapeUtil::MakeTupleShape({keys_shape, values_shape}), - {keys, values}, 0, &builder, module_.get())); + {keys, values}, 0, /*is_stable=*/false, &builder, + module_.get())); computation_ = module_->AddEntryComputation(builder.Build()); RunAnalysis(); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index aa1f3a2421..8ece90e05c 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -214,7 +214,7 @@ StatusOr> HloInstruction::CreateFromProto( << proto.called_computation_ids_size(); auto sort_operands = all_operands(); instruction = CreateSort(shape, proto.dimensions(0), all_operands(), - computations(0)); + computations(0), proto.is_stable()); break; } case HloOpcode::kTranspose: @@ -1170,9 +1170,10 @@ HloInstruction::CreateBroadcastSequence( /* static */ std::unique_ptr HloInstruction::CreateSort( const Shape& shape, int64 dimension, - absl::Span operands, HloComputation* compare) { + absl::Span operands, HloComputation* compare, + bool is_stable) { return absl::make_unique(shape, dimension, operands, - compare); + compare, is_stable); } /* static */ std::unique_ptr HloInstruction::CreateFusion( diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d7469e1ac9..8470cf7ec5 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -686,10 +686,11 @@ class HloInstruction { // comparisons in the sorting algorithm. 'compare' gets 2 * n parameters, // where parameters 2 * i and 2 * i + 1 are the values of the i-th operand at // specific index positions which should be compared, and should return a - // PRED. + // PRED. 'is_stable' specifies whether stable sorting is required. static std::unique_ptr CreateSort( const Shape& shape, int64 dimension, - absl::Span operands, HloComputation* compare); + absl::Span operands, HloComputation* compare, + bool is_stable); // Creates a while instruction, given a condition computation, a body // computation, and the initial value for the input of the computations. For diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 92a74187c5..7c8d98b429 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -659,8 +659,11 @@ std::unique_ptr HloReduceInstruction::CloneWithNewOperandsImpl( HloSortInstruction::HloSortInstruction( const Shape& shape, int64 dimension, - absl::Span operands, HloComputation* compare) - : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) { + absl::Span operands, HloComputation* compare, + bool is_stable) + : HloInstruction(HloOpcode::kSort, shape), + dimensions_({dimension}), + is_stable_(is_stable) { for (auto* value : operands) { AppendOperand(value); } @@ -672,12 +675,18 @@ HloInstructionProto HloSortInstruction::ToProto() const { for (int64 dimension : dimensions_) { proto.add_dimensions(dimension); } + proto.set_is_stable(is_stable()); return proto; } std::vector HloSortInstruction::ExtraAttributesToStringImpl( const HloPrintOptions& options) const { - return {StrCat("dimensions={", StrJoin(dimensions(), ","), "}")}; + std::vector attrs; + attrs.push_back(StrCat("dimensions={", StrJoin(dimensions(), ","), "}")); + if (is_stable()) { + attrs.push_back("is_stable=true"); + } + return attrs; } bool HloSortInstruction::IdenticalSlowPath( @@ -688,14 +697,17 @@ bool HloSortInstruction::IdenticalSlowPath( if (dimensions() != casted_other.dimensions()) { return false; } + if (is_stable() != casted_other.is_stable()) { + return false; + } return eq_computations(to_apply(), other.to_apply()); } std::unique_ptr HloSortInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { - return absl::make_unique(shape, dimensions(0), - new_operands, to_apply()); + return absl::make_unique( + shape, dimensions(0), new_operands, to_apply(), is_stable()); } HloTransposeInstruction::HloTransposeInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index a0f2b46ba4..8bb37ab435 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -447,7 +447,7 @@ class HloSortInstruction : public HloInstruction { public: explicit HloSortInstruction(const Shape& shape, int64 dimension, absl::Span operands, - HloComputation* compare); + HloComputation* compare, bool is_stable); // Returns the dimension sizes or numbers associated with this instruction. const std::vector& dimensions() const override { return dimensions_; } int64 dimensions(int64 index) const override { return dimensions()[index]; } @@ -460,6 +460,7 @@ class HloSortInstruction : public HloInstruction { HloInstruction* mutable_keys() { return mutable_operand(0); } // Returns the number of value operands. int64 values_count() const { return operand_count() - 1; } + bool is_stable() const { return is_stable_; } private: std::vector ExtraAttributesToStringImpl( @@ -474,6 +475,7 @@ class HloSortInstruction : public HloInstruction { HloCloneContext* context) const override; std::vector dimensions_; + bool is_stable_; }; class HloTransposeInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 20dbed07c5..b8e699fee2 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -895,6 +895,8 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder, optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; + optional is_stable = false; + attrs["is_stable"] = {/*required=*/false, AttrTy::kBool, &is_stable}; optional to_apply; attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation, &to_apply}; @@ -902,8 +904,9 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder, dimensions->size() != 1) { return false; } - instruction = builder->AddInstruction(HloInstruction::CreateSort( - shape, dimensions->at(0), operands, to_apply.value())); + instruction = builder->AddInstruction( + HloInstruction::CreateSort(shape, dimensions->at(0), operands, + to_apply.value(), is_stable.value())); break; } case HloOpcode::kTuple: { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 203a7dba22..4b9453cfd7 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1145,6 +1145,24 @@ ENTRY Sort { ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}, u32[1024,16]{0,1}, f32[1024,16]{0,1}) sort(keys, values.0, values.1, values.2), dimensions={0}, to_apply=compare } +)" +}, +// Sort (Key) is_stable=true +{ +"SortKeyStable", +R"(HloModule sort + +compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) +} + +ENTRY Sort { + x = f32[1024]{0} parameter(0) + ROOT sorted = f32[1024]{0} sort(x), dimensions={0}, is_stable=true, to_apply=compare +} + )" }, // Conditional diff --git a/tensorflow/compiler/xla/service/op_expander_pass.cc b/tensorflow/compiler/xla/service/op_expander_pass.cc index 87f0886a97..02c9d4b387 100644 --- a/tensorflow/compiler/xla/service/op_expander_pass.cc +++ b/tensorflow/compiler/xla/service/op_expander_pass.cc @@ -36,6 +36,9 @@ StatusOr OpExpanderPass::Run(HloModule* module) { for (HloInstruction* inst : matching_instructions) { TF_ASSIGN_OR_RETURN(HloInstruction * expanded_root, ExpandInstruction(inst)); + if (expanded_root == nullptr) { + continue; + } TF_RETURN_IF_ERROR(inst->parent()->ReplaceInstruction(inst, expanded_root)); } diff --git a/tensorflow/compiler/xla/service/op_expander_pass.h b/tensorflow/compiler/xla/service/op_expander_pass.h index 794849d354..276e3d70b8 100644 --- a/tensorflow/compiler/xla/service/op_expander_pass.h +++ b/tensorflow/compiler/xla/service/op_expander_pass.h @@ -33,7 +33,9 @@ class OpExpanderPass : public HloModulePass { // Returns `true` if `instruction` should be expanded by this pass. virtual bool InstructionMatchesPattern(HloInstruction* instruction) = 0; - // Returns a replacement for `instruction`. + // Returns a replacement for `instruction`, or nullptr if no replacement is + // neeeded (e.g. only the to_apply subcomputation of the instruction was + // modified). virtual StatusOr ExpandInstruction( HloInstruction* instruction) = 0; }; diff --git a/tensorflow/compiler/xla/service/stable_sort_expander.cc b/tensorflow/compiler/xla/service/stable_sort_expander.cc new file mode 100644 index 0000000000..1aa7e5fe7c --- /dev/null +++ b/tensorflow/compiler/xla/service/stable_sort_expander.cc @@ -0,0 +1,204 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/stable_sort_expander.h" + +#include +#include +#include + +#include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_instructions.h" +#include "tensorflow/compiler/xla/service/op_expander_pass.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace xla { + +// Looks for a iota operand that can be used as tie breaker in the computation. +// If no matching iota operand is found, a iota operand is added to Sort. The +// comparison computation is adjusted to break ties using the values from the +// iota operand. +StatusOr StableSortExpander::ExpandInstruction( + HloInstruction* instruction) { + auto* sort = Cast(instruction); + HloComputation* computation = sort->parent(); + + HloInstruction* expanded_sort = nullptr; + absl::flat_hash_set used_indices; + int64 iota_index = -1; + for (const HloInstruction* operand : sort->operands()) { + // We can only use the iota operand if it has an iota dimension which is the + // same as the dimension to sort. Also it should have an integral type that + // is large enough for the number of elements in the sort dimension. For + // now, we only allow S32, because we expect to find a S32 iota operand for + // all Sort ops which are created by TopK. + // TODO(b/122298745): Also support other types. + if (operand->opcode() == HloOpcode::kIota && + Cast(operand)->iota_dimension() == + sort->sort_dimension() && + operand->shape().element_type() == S32) { + iota_index = sort->operand_index(operand); + break; + } + } + + // If there is currently no iota operand which we could use for making the + // sort stable, we will have to add a new such operand. + if (iota_index == -1) { + Shape iota_shape = sort->operand(0)->shape(); + // We might need to use S64 if the number of elements in the sort dimension + // is bigger than 2^31 - 1. + // TODO(b/122298745): Handle Sort ops where S32 is too small for the number + // of elements in the sort dimension. + if (iota_shape.dimensions(sort->sort_dimension()) > + std::numeric_limits::max()) { + return Unimplemented( + "Stable sorting of more than 2^31-1 elements is not implemented"); + } + iota_shape.set_element_type(S32); + auto iota = computation->AddInstruction( + HloInstruction::CreateIota(iota_shape, sort->sort_dimension())); + + // Create a new comparator. + auto comparator = sort->to_apply(); + absl::flat_hash_map> + replacements; + std::vector> extra_parameters; + std::vector extra_parameter_ptrs; + Shape scalar_shape = ShapeUtil::MakeShape(S32, {}); + extra_parameters.push_back(HloInstruction::CreateParameter( + sort->operand_count() * 2, scalar_shape, + absl::StrCat("p.", sort->operand_count(), ".lhs"))); + extra_parameter_ptrs.push_back(extra_parameters.back().get()); + extra_parameters.push_back(HloInstruction::CreateParameter( + sort->operand_count() * 2 + 1, scalar_shape, + absl::StrCat("p.", sort->operand_count(), ".rhs"))); + extra_parameter_ptrs.push_back(extra_parameters.back().get()); + sort->set_to_apply(sort->GetModule()->AddEmbeddedComputation( + comparator->CloneWithReplacements(std::move(replacements), + extra_parameter_ptrs))); + + // Replace the original sort op. + std::vector new_operands(sort->operands().begin(), + sort->operands().end()); + new_operands.push_back(iota); + std::vector new_shapes = sort->operand_count() == 1 + ? std::vector{sort->shape()} + : sort->shape().tuple_shapes(); + new_shapes.push_back(iota_shape); + Shape new_sort_shape = ShapeUtil::MakeTupleShape(new_shapes); + HloInstruction* new_sort = computation->AddInstruction( + sort->CloneWithNewOperands(new_sort_shape, new_operands)); + + // Add a "wrapper" around the new sort op to make sure we have the same + // shape as before. For the rank 1 case, we only need a GetTupleElement, + // otherwise we create a Tuple consisting of GetTupleElements of the new + // sort. + std::vector tuple_elements; + tuple_elements.reserve(sort->operand_count()); + for (int64 i = 0; i < sort->operand_count(); ++i) { + tuple_elements.push_back( + computation->AddInstruction(HloInstruction::CreateGetTupleElement( + sort->operand(i)->shape(), new_sort, i))); + } + expanded_sort = tuple_elements[0]; + if (tuple_elements.size() > 1) { + expanded_sort = computation->AddInstruction( + HloInstruction::CreateTuple(tuple_elements)); + } + sort = Cast(new_sort); + iota_index = sort->operand_count() - 1; + } + + // Modify the computation to break ties using the iota operand. + auto comparator = sort->to_apply(); + std::vector instructions_postorder = + comparator->MakeInstructionPostOrder(); + absl::flat_hash_map replacements; + // Look up instr in the replacements map, and return either the replacement, + // or instr, if the replacement isn't present. + auto replace = [&](HloInstruction* instr) { + auto it = replacements.find(instr); + if (it == replacements.end()) { + return instr; + } + return it->second; + }; + HloInstruction* old_root = comparator->root_instruction(); + // The comparison computation gets 2 * n parameters (n being the number of + // operands of Sort), where parameters 2 * i and 2 * i + 1 correspond to two + // different scalars of operand i of Sort which are to be compared. The + // comparison computation should induce a strict weak order, so if + // to_apply(p1.lhs, p1.rhs, ..., pn.lhs, pn.rhs) is equal to + // to_apply(p1.rhs, p1.lhs, ..., pn.rhs, pn.lhs), we can conclude that the + // values to be compared are equivalent, and perform a tie-breaker comparison. + // + // We clone each instruction with at least one operand, but use as new + // operands of the instruction the replacements of the original operands. + // Parameter 2 * i is replaced by parameter 2 * i + 1 and vice versa. This + // should make sure that the cloned root instruction gives the result of the + // comparison computation when being called with each scalar pair reversed. + // parameters corresponding to the iota operand. + for (int64 i = 0; i < comparator->num_parameters(); ++i) { + replacements[comparator->parameter_instruction(i)] = + comparator->parameter_instruction(i ^ 1); + } + HloInstruction* cloned_root = nullptr; + for (HloInstruction* inst : instructions_postorder) { + if (inst->operand_count() == 0) { + continue; + } + std::vector new_operands; + new_operands.reserve(inst->operand_count()); + for (HloInstruction* operand : inst->operands()) { + new_operands.push_back(replace(operand)); + } + auto new_instruction = + inst->CloneWithNewOperands(inst->shape(), new_operands); + replacements[inst] = new_instruction.get(); + if (inst == old_root) { + cloned_root = new_instruction.get(); + } + comparator->AddInstruction(std::move(new_instruction)); + } + CHECK_NE(cloned_root, nullptr); + Shape scalar_pred = ShapeUtil::MakeShape(PRED, {}); + HloInstruction* same = + comparator->AddInstruction(HloInstruction::CreateBinary( + scalar_pred, HloOpcode::kEq, old_root, cloned_root)); + HloInstruction* tie_breaker = + comparator->AddInstruction(HloInstruction::CreateBinary( + scalar_pred, HloOpcode::kLt, + comparator->parameter_instruction(2 * iota_index), + comparator->parameter_instruction(2 * iota_index + 1))); + HloInstruction* new_root = + comparator->AddInstruction(HloInstruction::CreateTernary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kSelect, same, tie_breaker, + old_root)); + comparator->set_root_instruction(new_root); + + return expanded_sort; +} + +bool StableSortExpander::InstructionMatchesPattern( + HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kSort && + Cast(instruction)->is_stable(); +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/stable_sort_expander.h b/tensorflow/compiler/xla/service/stable_sort_expander.h new file mode 100644 index 0000000000..31b6fd92d2 --- /dev/null +++ b/tensorflow/compiler/xla/service/stable_sort_expander.h @@ -0,0 +1,42 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_ + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/service/op_expander_pass.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace xla { + +// HLO pass which expands Sort ops that have the is_stable field set to true +// into equivalent Sort ops which guarantee stable sorting without relying on +// the is_stable field. +class StableSortExpander : public OpExpanderPass { + public: + absl::string_view name() const override { return "stable-sort-expander"; } + + private: + bool InstructionMatchesPattern(HloInstruction* instruction) override; + StatusOr ExpandInstruction( + HloInstruction* instruction) override; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_STABLE_SORT_EXPANDER_H_ diff --git a/tensorflow/compiler/xla/service/stable_sort_expander_test.cc b/tensorflow/compiler/xla/service/stable_sort_expander_test.cc new file mode 100644 index 0000000000..a62d953e6e --- /dev/null +++ b/tensorflow/compiler/xla/service/stable_sort_expander_test.cc @@ -0,0 +1,358 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/stable_sort_expander.h" + +#include "tensorflow/compiler/xla/service/algebraic_simplifier.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/service/pattern_matcher.h" +#include "tensorflow/compiler/xla/service/pattern_matcher_gmock.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { + +namespace m = match; + +using StableSortExpanderTest = HloTestBase; + +// Checks whether 'a' and 'b' are roots of equivalent computations, except that +// parameters 2 * i and 2 * i + 1 are switched. +bool IsSameComputationExceptParams(const HloInstruction* a, + const HloInstruction* b) { + if (a->opcode() != b->opcode() || a->operand_count() != b->operand_count()) { + return false; + } + if (a->opcode() == HloOpcode::kParameter) { + // Check that parameters were switched. + return a->parameter_number() == (b->parameter_number() ^ 1); + } + // If the operation has no operands, it should actually be the same. + if (a->operand_count() == 0) { + return a == b; + } + // Otherwise recursively compare all operands. + for (int64 i = 0; i < a->operand_count(); ++i) { + if (!IsSameComputationExceptParams(a->operand(i), b->operand(i))) { + return false; + } + } + return true; +} + +// Check that the comparison computation has been modified to add a tie breaker +// using 'iota_parameter'. +void CheckComputationHasTieBreaker(const HloInstruction* root, + int64 iota_parameter) { + // With the tie breaker, the root instruction should be + // Select(Eq(Comp(), CompReverse()), Lt(), Comp()) + // with Comp() being the original comparison function, and CompReverse() being + // the copied comparison function where the parameters are reversed. Lt() is + // the tie breaker comparison using the Iota operand. + ASSERT_EQ(root->opcode(), HloOpcode::kSelect); + ASSERT_EQ(root->operand(0)->opcode(), HloOpcode::kEq); + + // Check that the tie breaker instruction is correct. + EXPECT_THAT(root->operand(1), + GmockMatch(m::Lt(m::Parameter(iota_parameter * 2), + m::Parameter(iota_parameter * 2 + 1)))); + EXPECT_EQ(root->operand(2), root->operand(0)->operand(0)); + + // Check that Comp() and CompReverse() are equivalent except that + // CompReverse() has reversed parameters. + EXPECT_TRUE(IsSameComputationExceptParams(root->operand(0)->operand(0), + root->operand(0)->operand(1))); +} + +TEST_F(StableSortExpanderTest, StabilizeSortReuseIotaOperand) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = s32[] parameter(2) + p.1.rhs = s32[] parameter(3) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} iota(), iota_dimension=1 + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=true + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota()), 0))); + CheckComputationHasTieBreaker( + root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1); +} + +TEST_F(StableSortExpanderTest, + StabilizeSortReuseIotaOperandComplicatedComparison) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = s32[] parameter(2) + p.1.rhs = s32[] parameter(3) + max = u32[] constant(2147483647) + zero = s32[] constant(0) + lhs.signed = s32[] bitcast-convert(p.0.lhs) + lhs.unsigned = u32[] bitcast-convert(p.0.lhs) + lhs.flipped = u32[] subtract(max, lhs.unsigned) + lhs.flipped.signed = s32[] bitcast-convert(lhs.flipped) + lhs.is_negative = pred[] less-than(lhs.flipped.signed, zero) + lhs.converted = s32[] select(lhs.is_negative, lhs.flipped.signed, lhs.signed) + rhs.signed = s32[] bitcast-convert(p.0.rhs) + rhs.unsigned = u32[] bitcast-convert(p.0.rhs) + rhs.flipped = u32[] subtract(max, rhs.unsigned) + rhs.flipped.signed = s32[] bitcast-convert(rhs.flipped) + rhs.is_negative = pred[] less-than(rhs.flipped.signed, zero) + rhs.converted = s32[] select(rhs.is_negative, rhs.flipped.signed, rhs.signed) + ROOT lt = pred[] less-than(lhs.converted, rhs.converted) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} iota(), iota_dimension=1 + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=true + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota()), 0))); + CheckComputationHasTieBreaker( + root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1); +} + +TEST_F(StableSortExpanderTest, StabilizeSortAddIotaOperandAndChangeRoot) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = s32[] parameter(2) + p.1.rhs = s32[] parameter(3) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} parameter(1) + ROOT sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=true + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, GmockMatch(m::Tuple( + m::GetTupleElement( + m::Sort(m::Parameter(0), m::Parameter(1), m::Iota()), 0), + m::GetTupleElement( + m::Sort(m::Parameter(0), m::Parameter(1), m::Iota()), 1)))); + CheckComputationHasTieBreaker( + root->operand(0)->operand(0)->to_apply()->root_instruction(), + /*iota_parameter=*/2); +} + +TEST_F(StableSortExpanderTest, HonorIsStableFlag) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = s32[] parameter(2) + p.1.rhs = s32[] parameter(3) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} iota(), iota_dimension=1 + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=false + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_FALSE(stabilizer.Run(module.get()).ValueOrDie()); +} + +TEST_F(StableSortExpanderTest, + StabilizeSortDontReuseIotaOperandWrongDimension) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = s32[] parameter(2) + p.1.rhs = s32[] parameter(3) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} iota(), iota_dimension=0 + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=true + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + // Simplify away the "wrapper" tuple around the new sort. + AlgebraicSimplifier simplifier(AlgebraicSimplifierOptions( + [](const Shape&, const Shape&) { return false; })); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota(), m::Iota()), 0))); + CheckComputationHasTieBreaker( + root->operand(0)->to_apply()->root_instruction(), + /*iota_parameter=*/2); +} + +TEST_F(StableSortExpanderTest, StabilizeSortDontReuseIotaOperandWrongType) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = f32[] parameter(0) + p.0.rhs = f32[] parameter(1) + p.1.lhs = f32[] parameter(2) + p.1.rhs = f32[] parameter(3) + ROOT lt = pred[] less-than(p.0.lhs, p.0.rhs) + } + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = f32[64,8732]{1,0} iota(), iota_dimension=1 + sort = (f32[64,8732]{1,0}, f32[64,8732]{1,0}) sort(keys, values), + dimensions={1}, to_apply=compare, is_stable=true + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + // Simplify away the "wrapper" tuple around the new sort. + AlgebraicSimplifier simplifier(AlgebraicSimplifierOptions( + [](const Shape&, const Shape&) { return false; })); + ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota(), m::Iota()), 0))); + CheckComputationHasTieBreaker( + root->operand(0)->to_apply()->root_instruction(), + /*iota_parameter=*/2); +} + +TEST_F(StableSortExpanderTest, StabilizeSortR1) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = s32[] parameter(0) + p.0.rhs = s32[] parameter(1) + mask = s32[] constant(65535) + lhs = s32[] and(p.0.lhs, mask) + rhs = s32[] and(p.0.rhs, mask) + ROOT lt = pred[] less-than(lhs, rhs) + } + + ENTRY sort_computation { + keys = s32[64,8732]{1,0} parameter(0) + ROOT sort = s32[64,8732]{1,0} sort(keys), dimensions={0}, to_apply=compare, + is_stable=true + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota()), 0))); + CheckComputationHasTieBreaker( + root->operand(0)->to_apply()->root_instruction(), /*iota_parameter=*/1); +} + +TEST_F(StableSortExpanderTest, StabilizeSortR1NoRoot) { + const char* hlo_string = R"( + HloModule permutation_sort + + compare { + p.0.lhs = s32[] parameter(0) + p.0.rhs = s32[] parameter(1) + mask = s32[] constant(65535) + lhs = s32[] and(p.0.lhs, mask) + rhs = s32[] and(p.0.rhs, mask) + ROOT lt = pred[] less-than(lhs, rhs) + } + + ENTRY sort_computation { + keys = s32[64,8732]{1,0} parameter(0) + sort = s32[64,8732]{1,0} sort(keys), dimensions={0}, to_apply=compare, + is_stable=true + ROOT neg = s32[64,8732]{1,0} negate(sort) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + StableSortExpander stabilizer; + EXPECT_TRUE(stabilizer.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Negate(m::GetTupleElement( + m::Sort(m::Parameter(0), m::Iota()), 0)))); + CheckComputationHasTieBreaker( + root->operand(0)->operand(0)->to_apply()->root_instruction(), + /*iota_parameter=*/1); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index 5516026139..6f61fc4416 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -1072,7 +1072,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto keys = builder.AddInstruction( HloInstruction::CreateParameter(0, keys_shape, "keys")); TF_ASSERT_OK_AND_ASSIGN( - auto* sort, MakeSortHlo(keys_shape, {keys}, 0, &builder, module_.get())); + auto* sort, MakeSortHlo(keys_shape, {keys}, 0, /*is_stable=*/false, + &builder, module_.get())); computation_ = module_->AddEntryComputation(builder.Build()); RunAnalysis(); @@ -1094,7 +1095,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { TF_ASSERT_OK_AND_ASSIGN( auto* sort, MakeSortHlo(ShapeUtil::MakeTupleShape({keys_shape, values_shape}), - {keys, values}, 0, &builder, module_.get())); + {keys, values}, 0, /*is_stable=*/false, &builder, + module_.get())); computation_ = module_->AddEntryComputation(builder.Build()); RunAnalysis(); -- GitLab From 0ab7f2d663201544556f16a77df5243394b482a1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 02:37:34 -0800 Subject: [PATCH 127/351] Clean up DsoLoader, removing unused functionality and complexity. PiperOrigin-RevId: 233920360 --- .../platform/default/gpu/cupti_wrapper.cc | 41 ++- .../cuda/cuda_driver_wrapper.h | 32 +- .../stream_executor/cuda/cudart_stub.cc | 44 +-- .../platform/default/dso_loader.cc | 310 ++++-------------- .../platform/default/dso_loader.h | 100 ++---- 5 files changed, 150 insertions(+), 377 deletions(-) diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc index 7ac5e5c445..481bbf9bae 100644 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.cc +++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.cc @@ -28,27 +28,26 @@ namespace profiler { namespace dynload { -#define LIBCUPTI_WRAP(__name) \ - struct DynLoadShim__##__name { \ - static const char* kName; \ - using FuncPointerT = std::add_pointer::type; \ - static void* GetDsoHandle() { \ - static auto status = \ - stream_executor::internal::CachedDsoLoader::GetLibcuptiDsoHandle(); \ - return status.ValueOrDie(); \ - } \ - static FuncPointerT DynLoad() { \ - static void* f; \ - TF_CHECK_OK(::tensorflow::Env::Default()->GetSymbolFromLibrary( \ - GetDsoHandle(), kName, &f)) \ - << "could not find " << kName << "in libcupti DSO"; \ - return reinterpret_cast(f); \ - } \ - template \ - CUptiResult operator()(Args... args) { \ - return DynLoad()(args...); \ - } \ - } __name; \ +#define LIBCUPTI_WRAP(__name) \ + struct DynLoadShim__##__name { \ + static const char* kName; \ + using FuncPointerT = std::add_pointer::type; \ + template \ + CUptiResult operator()(Args... args) { \ + static auto fn = []() -> FuncPointerT { \ + auto handle_or = \ + stream_executor::internal::CachedDsoLoader::GetCuptiDsoHandle(); \ + if (!handle_or.ok()) return nullptr; \ + void* symbol; \ + stream_executor::port::Env::Default() \ + ->GetSymbolFromLibrary(handle_or.ValueOrDie(), kName, &symbol) \ + .IgnoreError(); \ + return reinterpret_cast(symbol); \ + }(); \ + if (fn == nullptr) return CUPTI_ERROR_UNKNOWN; \ + return fn(args...); \ + } \ + } __name; \ const char* DynLoadShim__##__name::kName = #__name; LIBCUPTI_WRAP(cuptiActivityDisable); diff --git a/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h b/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h index 657eea8171..0de27d5cd3 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h +++ b/tensorflow/stream_executor/cuda/cuda_driver_wrapper.h @@ -44,22 +44,22 @@ namespace wrap { #define TO_STR_(x) #x #define TO_STR(x) TO_STR_(x) -#define STREAM_EXECUTOR_LIBCUDA_WRAP(cudaSymbolName) \ - template \ - auto cudaSymbolName(Args... args)->decltype(::cudaSymbolName(args...)) { \ - using FuncPtrT = std::add_pointer::type; \ - static FuncPtrT loaded = []() -> FuncPtrT { \ - static const char *kName = TO_STR(cudaSymbolName); \ - void *f; \ - auto s = stream_executor::port::Env::Default()->GetSymbolFromLibrary( \ - stream_executor::internal::CachedDsoLoader::GetLibcudaDsoHandle() \ - .ValueOrDie(), \ - kName, &f); \ - CHECK(s.ok()) << "could not find " << kName \ - << " in libcuda DSO; dlerror: " << s.error_message(); \ - return reinterpret_cast(f); \ - }(); \ - return loaded(args...); \ +#define STREAM_EXECUTOR_LIBCUDA_WRAP(cudaSymbolName) \ + template \ + auto cudaSymbolName(Args... args)->decltype(::cudaSymbolName(args...)) { \ + using FuncPtrT = std::add_pointer::type; \ + static FuncPtrT loaded = []() -> FuncPtrT { \ + static const char *kName = TO_STR(cudaSymbolName); \ + void *f; \ + auto s = stream_executor::port::Env::Default()->GetSymbolFromLibrary( \ + stream_executor::internal::CachedDsoLoader::GetCudaDriverDsoHandle() \ + .ValueOrDie(), \ + kName, &f); \ + CHECK(s.ok()) << "could not find " << kName \ + << " in libcuda DSO; dlerror: " << s.error_message(); \ + return reinterpret_cast(f); \ + }(); \ + return loaded(args...); \ } #endif diff --git a/tensorflow/stream_executor/cuda/cudart_stub.cc b/tensorflow/stream_executor/cuda/cudart_stub.cc index c5fc43d56c..8878700c5e 100644 --- a/tensorflow/stream_executor/cuda/cudart_stub.cc +++ b/tensorflow/stream_executor/cuda/cudart_stub.cc @@ -21,19 +21,19 @@ limitations under the License. #include "tensorflow/stream_executor/platform/dso_loader.h" namespace { -void *GetDsoHandle() { - static auto handle = [] { - void *result = nullptr; - using DsoLoader = stream_executor::internal::DsoLoader; - DsoLoader::GetLibcudartDsoHandle(&result).IgnoreError(); - return result; +void* GetDsoHandle() { + static auto handle = []() -> void* { + auto handle_or = + stream_executor::internal::DsoLoader::GetCudaRuntimeDsoHandle(); + if (!handle_or.ok()) return nullptr; + return handle_or.ValueOrDie(); }(); return handle; } template -T LoadSymbol(const char *symbol_name) { - void *symbol = nullptr; +T LoadSymbol(const char* symbol_name) { + void* symbol = nullptr; auto env = stream_executor::port::Env::Default(); env->GetSymbolFromLibrary(GetDsoHandle(), symbol_name, &symbol).IgnoreError(); return reinterpret_cast(symbol); @@ -41,28 +41,28 @@ T LoadSymbol(const char *symbol_name) { cudaError_t GetSymbolNotFoundError() { return cudaErrorSharedObjectSymbolNotFound; } -const char *GetSymbolNotFoundStrError() { +const char* GetSymbolNotFoundStrError() { return "cudaErrorSharedObjectSymbolNotFound"; } } // namespace // Code below is auto-generated. extern "C" { -cudaError_t CUDART_CB cudaFree(void *devPtr) { - using FuncPtr = cudaError_t (*)(void *devPtr); +cudaError_t CUDART_CB cudaFree(void* devPtr) { + using FuncPtr = cudaError_t (*)(void* devPtr); static auto func_ptr = LoadSymbol("cudaFree"); if (!func_ptr) return GetSymbolNotFoundError(); return func_ptr(devPtr); } -cudaError_t CUDART_CB cudaGetDevice(int *device) { - using FuncPtr = cudaError_t (*)(int *device); +cudaError_t CUDART_CB cudaGetDevice(int* device) { + using FuncPtr = cudaError_t (*)(int* device); static auto func_ptr = LoadSymbol("cudaGetDevice"); if (!func_ptr) return GetSymbolNotFoundError(); return func_ptr(device); } -cudaError_t CUDART_CB cudaGetDeviceProperties(cudaDeviceProp *prop, +cudaError_t CUDART_CB cudaGetDeviceProperties(cudaDeviceProp* prop, int device) { using FuncPtr = cudaError_t (*)(cudaDeviceProp * prop, int device); static auto func_ptr = LoadSymbol("cudaGetDeviceProperties"); @@ -70,8 +70,8 @@ cudaError_t CUDART_CB cudaGetDeviceProperties(cudaDeviceProp *prop, return func_ptr(prop, device); } -const char *CUDART_CB cudaGetErrorString(cudaError_t error) { - using FuncPtr = const char *(*)(cudaError_t error); +const char* CUDART_CB cudaGetErrorString(cudaError_t error) { + using FuncPtr = const char* (*)(cudaError_t error); static auto func_ptr = LoadSymbol("cudaGetErrorString"); if (!func_ptr) return GetSymbolNotFoundStrError(); return func_ptr(error); @@ -86,27 +86,27 @@ cudaError_t CUDART_CB cudaSetDevice(int device) { cudaError_t CUDART_CB cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, + void* userData, unsigned int flags) { using FuncPtr = cudaError_t (*)(cudaStream_t stream, cudaStreamCallback_t callback, - void *userData, unsigned int flags); + void* userData, unsigned int flags); static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); if (!func_ptr) return GetSymbolNotFoundError(); return func_ptr(stream, callback, userData, flags); } -cudaError_t CUDART_CB cudaGetDeviceCount(int *count) { - using FuncPtr = cudaError_t (*)(int *count); +cudaError_t CUDART_CB cudaGetDeviceCount(int* count) { + using FuncPtr = cudaError_t (*)(int* count); static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); if (!func_ptr) return GetSymbolNotFoundError(); return func_ptr(count); } cudaError_t CUDART_CB cudaPointerGetAttributes( - struct cudaPointerAttributes *attributes, const void *ptr) { + struct cudaPointerAttributes* attributes, const void* ptr) { using FuncPtr = cudaError_t (*)(struct cudaPointerAttributes * attributes, - const void *ptr); + const void* ptr); static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); if (!func_ptr) return GetSymbolNotFoundError(); return func_ptr(attributes, ptr); diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc index 8592455860..ad8112b831 100644 --- a/tensorflow/stream_executor/platform/default/dso_loader.cc +++ b/tensorflow/stream_executor/platform/default/dso_loader.cc @@ -12,298 +12,130 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/stream_executor/platform/default/dso_loader.h" -// TODO(jhen): Replace hardcoded, platform specific path strings in GetXXXPath() -// with a function in e.g. cuda.h. - -#include #include -#include -#include #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" +#include "cuda/cuda_config.h" #include "tensorflow/core/platform/load_library.h" #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/lib/error.h" #include "tensorflow/stream_executor/lib/path.h" -#include "tensorflow/stream_executor/lib/str_util.h" -#include "tensorflow/stream_executor/lib/stringprintf.h" -#include "tensorflow/stream_executor/platform/default/dso_loader.h" #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" -#if !defined(PLATFORM_GOOGLE) -#include "absl/strings/string_view.h" -#include "cuda/cuda_config.h" -#endif - namespace stream_executor { namespace internal { +namespace { string GetCudaVersion() { return TF_CUDA_VERSION; } string GetCudnnVersion() { return TF_CUDNN_VERSION; } -/* static */ port::Status DsoLoader::GetCublasDsoHandle(void** dso_handle) { - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "cublas", GetCudaVersion()), - GetCudaLibraryDirPath()), - dso_handle); -} - -/* static */ port::Status DsoLoader::GetCudnnDsoHandle(void** dso_handle) { - // libcudnn is versioned differently than the other libraries and may have a - // different version number than other CUDA libraries. See b/22397368 for - // some details about the complications surrounding this. - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "cudnn", GetCudnnVersion()), - GetCudaLibraryDirPath()), - dso_handle); -} - -/* static */ port::Status DsoLoader::GetCufftDsoHandle(void** dso_handle) { - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "cufft", GetCudaVersion()), - GetCudaLibraryDirPath()), - dso_handle); -} - -/* static */ port::Status DsoLoader::GetCurandDsoHandle(void** dso_handle) { - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "curand", GetCudaVersion()), - GetCudaLibraryDirPath()), - dso_handle); -} - -/* static */ port::Status DsoLoader::GetLibcudaDsoHandle(void** dso_handle) { -#if defined(PLATFORM_WINDOWS) - return GetDsoHandle( - FindDsoPath(port::Env::Default()->FormatLibraryFileName("nvcuda", ""), - GetCudaDriverLibraryPath()), - dso_handle); -#else - port::Status status = GetDsoHandle( - FindDsoPath(port::Env::Default()->FormatLibraryFileName("cuda", "1"), - GetCudaDriverLibraryPath()), - dso_handle); -#if defined(__APPLE__) - // On Mac OS X, CUDA sometimes installs libcuda.dylib instead of - // libcuda.1.dylib. - return status.ok() - ? status - : GetDsoHandle( - FindDsoPath( - port::Env::Default()->FormatLibraryFileName("cuda", ""), - GetCudaDriverLibraryPath()), - dso_handle); -#else - return status; -#endif -#endif -} - -/* static */ port::Status DsoLoader::GetLibcuptiDsoHandle(void** dso_handle) { -#if defined(ANDROID_TEGRA) - // On Android devices the CUDA version number is not added to the library - // name. - return GetDsoHandle( - FindDsoPath(port::Env::Default()->FormatLibraryFileName("cupti", ""), - GetCudaCuptiLibraryPath()), - dso_handle); -#else - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "cupti", GetCudaVersion()), - GetCudaCuptiLibraryPath()), - dso_handle); -#endif -} - -/* static */ port::Status DsoLoader::GetLibcudartDsoHandle(void** dso_handle) { - return GetDsoHandle(FindDsoPath(port::Env::Default()->FormatLibraryFileName( - "cudart", GetCudaVersion()), - GetCudaLibraryDirPath()), - dso_handle); -} - -static mutex& GetRpathMutex() { - static mutex* mu = new mutex; - return *mu; -} - -/* static */ void DsoLoader::RegisterRpath(absl::string_view path) { - mutex_lock lock{GetRpathMutex()}; - GetRpaths()->emplace_back(path); -} - -/* static */ port::Status DsoLoader::GetDsoHandle(absl::string_view path, - void** dso_handle, - LoadKind load_kind) { - if (load_kind != LoadKind::kLocal) { - return port::Status(port::error::INVALID_ARGUMENT, - "Only LoadKind::kLocal is currently supported"); +port::StatusOr GetDsoHandle(const string& name, const string& version) { + auto filename = port::Env::Default()->FormatLibraryFileName(name, version); + void* dso_handle; + port::Status status = + port::Env::Default()->LoadLibrary(filename.c_str(), &dso_handle); + if (status.ok()) { + LOG(INFO) << "Successfully opened CUDA library " << filename; + return dso_handle; } - string path_string(path); - port::Status s = - port::Env::Default()->LoadLibrary(path_string.c_str(), dso_handle); - if (!s.ok()) { -#if !defined(PLATFORM_WINDOWS) - char* ld_library_path = getenv("LD_LIBRARY_PATH"); -#endif - LOG(INFO) << "Couldn't open CUDA library " << path + + auto message = absl::StrCat("Could not dlopen library '", filename, + "'; dlerror: ", status.error_message()); #if !defined(PLATFORM_WINDOWS) - << ". LD_LIBRARY_PATH: " - << (ld_library_path != nullptr ? ld_library_path : "") -#endif - ; - return port::Status(port::error::FAILED_PRECONDITION, - absl::StrCat("could not dlopen DSO: ", path, - "; dlerror: ", s.error_message())); + if (const char* ld_library_path = getenv("LD_LIBRARY_PATH")) { + message += absl::StrCat("; LD_LIRARY_PATH: ", ld_library_path); } - LOG(INFO) << "successfully opened CUDA library " << path << " locally"; - return port::Status::OK(); -} - -/* static */ string DsoLoader::GetBinaryDirectory(bool strip_executable_name) { - string exe_path = port::Env::Default()->GetExecutablePath(); - return strip_executable_name ? string(port::Dirname(exe_path)) : exe_path; -} - -// Creates a heap-allocated vector for initial rpaths. -// Ownership is transferred to the caller. -static std::vector* CreatePrimordialRpaths() { - auto rpaths = new std::vector; -#if defined(__APPLE__) - rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib"); -#else - rpaths->push_back("driver/driver_sh.runfiles/local_config_cuda/cuda/lib64"); #endif - return rpaths; -} - -/* static */ std::vector* DsoLoader::GetRpaths() { - static std::vector* rpaths = CreatePrimordialRpaths(); - return rpaths; + LOG(INFO) << message; + return port::Status(port::error::FAILED_PRECONDITION, message); } +} // namespace -/* static */ bool DsoLoader::TrySymbolicDereference(string* candidate) { +namespace DsoLoader { +port::StatusOr GetCudaDriverDsoHandle() { #if defined(PLATFORM_WINDOWS) - return false; -#else - char buf[PATH_MAX]; - char* result = realpath(candidate->c_str(), buf); - if (result == nullptr) { - return false; + return GetDsoHandle("nvcuda", ""); +#elif defined(__APPLE__) + // On Mac OS X, CUDA sometimes installs libcuda.dylib instead of + // libcuda.1.dylib. + auto handle_or = GetDsoHandle("cuda", ""); + if (handle_or.ok()) { + return handle_or; } - VLOG(3) << "realpath resolved candidate path \"" << *candidate << "\" to \"" - << result << "\""; - *candidate = result; - return true; #endif + return GetDsoHandle("cuda", "1"); } -/* static */ string DsoLoader::FindDsoPath(absl::string_view library_name, - absl::string_view runfiles_relpath) { - // Keep a record of the paths we attempted so we can dump out meaningful - // diagnostics if no path is found. - std::vector attempted; - - using StringPieces = std::vector; - string candidate; - - // Otherwise, try binary-plus-rpath locations. - string binary_directory = - GetBinaryDirectory(true /* = strip_executable_name */); - mutex_lock lock{GetRpathMutex()}; - for (const string& rpath : *GetRpaths()) { - candidate = - port::Join(StringPieces{binary_directory, rpath, library_name}, "/"); - if (TrySymbolicDereference(&candidate)) { - return candidate; - } - } - attempted.push_back(candidate); +port::StatusOr GetCudaRuntimeDsoHandle() { + return GetDsoHandle("cudart", GetCudaVersion()); +} - return string(library_name); +port::StatusOr GetCublasDsoHandle() { + return GetDsoHandle("cublas", GetCudaVersion()); } -/* static */ string DsoLoader::GetCudaLibraryDirPath() { -#if defined(__APPLE__) - return "external/local_config_cuda/cuda/lib"; -#else - return "external/local_config_cuda/cuda/lib64"; -#endif +port::StatusOr GetCufftDsoHandle() { + return GetDsoHandle("cufft", GetCudaVersion()); } -/* static */ string DsoLoader::GetCudaDriverLibraryPath() { -#if defined(__APPLE__) - return "external/local_config_cuda/cuda/driver/lib"; -#elif defined(PLATFORM_WINDOWS) - return ""; -#else - return "external/local_config_cuda/cuda/driver/lib64"; -#endif +port::StatusOr GetCurandDsoHandle() { + return GetDsoHandle("curand", GetCudaVersion()); } -/* static */ string DsoLoader::GetCudaCuptiLibraryPath() { -#if defined(__APPLE__) - return "external/local_config_cuda/cuda/extras/CUPTI/lib"; +port::StatusOr GetCuptiDsoHandle() { +#if defined(ANDROID_TEGRA) + // On Android devices the CUDA version number is not added to the library + // name. + return GetDsoHandle("cupti", ""); #else - return "external/local_config_cuda/cuda/extras/CUPTI/lib64"; + return GetDsoHandle("cupti", GetCudaVersion()); #endif } -// -- CachedDsoLoader - -/* static */ port::StatusOr CachedDsoLoader::GetCublasDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetCublasDsoHandle); - return result; +port::StatusOr GetCudnnDsoHandle() { + return GetDsoHandle("cudnn", GetCudnnVersion()); } +} // namespace DsoLoader -/* static */ port::StatusOr CachedDsoLoader::GetCurandDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetCurandDsoHandle); - return result; +namespace CachedDsoLoader { +port::StatusOr GetCudaDriverDsoHandle() { + static auto result = new auto(DsoLoader::GetCudaDriverDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::GetCudnnDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetCudnnDsoHandle); - return result; +port::StatusOr GetCudaRuntimeDsoHandle() { + static auto result = new auto(DsoLoader::GetCudaRuntimeDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::GetCufftDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetCufftDsoHandle); - return result; +port::StatusOr GetCublasDsoHandle() { + static auto result = new auto(DsoLoader::GetCublasDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::GetLibcudaDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetLibcudaDsoHandle); - return result; +port::StatusOr GetCurandDsoHandle() { + static auto result = new auto(DsoLoader::GetCurandDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::GetLibcuptiDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetLibcuptiDsoHandle); - return result; +port::StatusOr GetCufftDsoHandle() { + static auto result = new auto(DsoLoader::GetCufftDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::GetLibcudartDsoHandle() { - static port::StatusOr result = - FetchHandleResult(DsoLoader::GetLibcudartDsoHandle); - return result; +port::StatusOr GetCuptiDsoHandle() { + static auto result = new auto(DsoLoader::GetCuptiDsoHandle()); + return *result; } -/* static */ port::StatusOr CachedDsoLoader::FetchHandleResult( - std::function load_dso) { - void* handle; - auto status = load_dso(&handle); - if (!status.ok()) { - return status; - } - return handle; +port::StatusOr GetCudnnDsoHandle() { + static auto result = new auto(DsoLoader::GetCudnnDsoHandle()); + return *result; } - +} // namespace CachedDsoLoader } // namespace internal } // namespace stream_executor diff --git a/tensorflow/stream_executor/platform/default/dso_loader.h b/tensorflow/stream_executor/platform/default/dso_loader.h index 92c0db7037..45a8315b43 100644 --- a/tensorflow/stream_executor/platform/default/dso_loader.h +++ b/tensorflow/stream_executor/platform/default/dso_loader.h @@ -31,88 +31,30 @@ limitations under the License. namespace stream_executor { namespace internal { -// Permits StreamExecutor code to dynamically load a pre-determined set of -// relevant DSOs via dlopen. -// -// Thread-safe. -class DsoLoader { - public: - // The following methods either load the DSO of interest and return a dlopen - // handle or error status in the canonical namespace. - - static port::Status GetCublasDsoHandle(void** dso_handle); - static port::Status GetCudnnDsoHandle(void** dso_handle); - static port::Status GetCufftDsoHandle(void** dso_handle); - static port::Status GetCurandDsoHandle(void** dso_handle); - static port::Status GetLibcudaDsoHandle(void** dso_handle); - static port::Status GetLibcuptiDsoHandle(void** dso_handle); - static port::Status GetLibcudartDsoHandle(void** dso_handle); - - // Registers a new binary-relative path to use as a dlopen search path. - static void RegisterRpath(absl::string_view path); - - private: - // Registered rpaths (singleton vector) and a mutex that guards it. - static std::vector* GetRpaths(); - - // Descriptive boolean wrapper to indicate whether symbols are made available - // to resolve in later-loaded libraries. - enum class LoadKind { kLocal, kGlobal }; - - // Loads a DSO from the given "path" (which can technically be any dlopen-able - // name). If the load kind is global, the symbols in the loaded DSO are - // visible to subsequent DSO loading operations. - static port::Status GetDsoHandle(absl::string_view path, void** dso_handle, - LoadKind load_kind = LoadKind::kLocal); - - // Returns the binary directory (or binary path) associated with the currently - // executing program. If strip_executable_name is true, the executable file is - // stripped off of the path. - static string GetBinaryDirectory(bool strip_executable_name); - - // Invokes realpath on the original path; updates candidate and returns true - // if it succeeds (i.e. a file exists at the path); otherwise, returns false. - static bool TrySymbolicDereference(string* candidate); - - // Attempts to find a path to the DSO of interest, otherwise returns the - // bare library name: - // Arguments: - // library_name: the filename in tree; e.g. libOpenCL.so.1.0.0 - // runfiles_relpath: where to look for the library relative to the runfiles - // root; e.g. third_party/gpus/cuda/lib64 - static string FindDsoPath(absl::string_view library_name, - absl::string_view runfiles_relpath); - - // Return platform dependent paths for DSOs - static string GetCudaLibraryDirPath(); - static string GetCudaDriverLibraryPath(); - static string GetCudaCuptiLibraryPath(); - - SE_DISALLOW_COPY_AND_ASSIGN(DsoLoader); -}; +namespace DsoLoader { +// The following methods either load the DSO of interest and return a dlopen +// handle or error status. +port::StatusOr GetCudaDriverDsoHandle(); +port::StatusOr GetCudaRuntimeDsoHandle(); +port::StatusOr GetCublasDsoHandle(); +port::StatusOr GetCufftDsoHandle(); +port::StatusOr GetCurandDsoHandle(); +port::StatusOr GetCuptiDsoHandle(); +port::StatusOr GetCudnnDsoHandle(); +} // namespace DsoLoader // Wrapper around the DsoLoader that prevents us from dlopen'ing any of the DSOs // more than once. -class CachedDsoLoader { - public: - // Cached versions of the corresponding DsoLoader methods above. - static port::StatusOr GetCublasDsoHandle(); - static port::StatusOr GetCudnnDsoHandle(); - static port::StatusOr GetCufftDsoHandle(); - static port::StatusOr GetCurandDsoHandle(); - static port::StatusOr GetLibcudaDsoHandle(); - static port::StatusOr GetLibcuptiDsoHandle(); - static port::StatusOr GetLibcudartDsoHandle(); - - private: - // Fetches a DSO handle via "load_dso" and returns the StatusOr form of the - // result. - static port::StatusOr FetchHandleResult( - std::function load_dso); - - SE_DISALLOW_COPY_AND_ASSIGN(CachedDsoLoader); -}; - +namespace CachedDsoLoader { +// Cached versions of the corresponding DsoLoader methods above. +port::StatusOr GetCudaDriverDsoHandle(); +port::StatusOr GetCudaRuntimeDsoHandle(); +port::StatusOr GetCublasDsoHandle(); +port::StatusOr GetCufftDsoHandle(); +port::StatusOr GetCurandDsoHandle(); +port::StatusOr GetCuptiDsoHandle(); +port::StatusOr GetCudnnDsoHandle(); +} // namespace CachedDsoLoader } // namespace internal } // namespace stream_executor -- GitLab From 183ae6a8ca914d5f358e12d4c66f3f5eb0c0d934 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Thu, 14 Feb 2019 02:51:01 -0800 Subject: [PATCH 128/351] Export `tf.experimental.Module` as `tf.Module`. PiperOrigin-RevId: 233921773 --- tensorflow/python/module/module.py | 10 +++--- .../api/golden/v1/tensorflow.-module.pbtxt | 35 +++++++++++++++++++ .../tools/api/golden/v1/tensorflow.pbtxt | 4 +++ .../api/golden/v2/tensorflow.-module.pbtxt | 35 +++++++++++++++++++ .../tools/api/golden/v2/tensorflow.pbtxt | 4 +++ 5 files changed, 83 insertions(+), 5 deletions(-) create mode 100644 tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt create mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py index 0905df779f..5fe7644ee3 100644 --- a/tensorflow/python/module/module.py +++ b/tensorflow/python/module/module.py @@ -143,7 +143,7 @@ def with_name_scope(unbound_method): wrap_with_name_scope(unbound_method)) -@tf_export("experimental.Module") +@tf_export("Module", "experimental.Module") class Module(six.with_metaclass(ModuleMetaclass, tracking.AutoCheckpointable)): """Base neural network module class. @@ -256,9 +256,9 @@ class Module(six.with_metaclass(ModuleMetaclass, tracking.AutoCheckpointable)): Submodules are modules which are properties of this module, or found as properties of modules which are properties of this module (and so on). - >>> a = tf.experimental.Module() - >>> b = tf.experimental.Module() - >>> c = tf.experimental.Module() + >>> a = tf.Module() + >>> b = tf.Module() + >>> c = tf.Module() >>> a.b = b >>> b.c = c >>> assert list(a.submodules) == [b, c] @@ -283,7 +283,7 @@ class Module(six.with_metaclass(ModuleMetaclass, tracking.AutoCheckpointable)): flattened to find leaves. Finally every leaf value is optionally tested against the given `predicate` and finally yielded. - >>> class Foo(tf.experimental.Module): + >>> class Foo(tf.Module): ... def __init__(self): ... super(Foo, self).__init__() ... self.x = [tf.constant('a'), tf.constant('b')] diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt new file mode 100644 index 0000000000..973c7b0504 --- /dev/null +++ b/tensorflow/tools/api/golden/v1/tensorflow.-module.pbtxt @@ -0,0 +1,35 @@ +path: "tensorflow.Module" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "no_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index cb9d6a907f..103fdd0c1a 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -140,6 +140,10 @@ tf_module { name: "MetaGraphDef" mtype: "" } + member { + name: "Module" + mtype: "" + } member { name: "NameAttrList" mtype: "" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt new file mode 100644 index 0000000000..973c7b0504 --- /dev/null +++ b/tensorflow/tools/api/golden/v2/tensorflow.-module.pbtxt @@ -0,0 +1,35 @@ +path: "tensorflow.Module" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "name" + mtype: "" + } + member { + name: "name_scope" + mtype: "" + } + member { + name: "submodules" + mtype: "" + } + member { + name: "trainable_variables" + mtype: "" + } + member { + name: "variables" + mtype: "" + } + member_method { + name: "__init__" + argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } + member_method { + name: "no_name_scope" + argspec: "args=[\'cls\', \'method\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 2a7813da14..d26406df47 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -24,6 +24,10 @@ tf_module { name: "IndexedSlices" mtype: "" } + member { + name: "Module" + mtype: "" + } member { name: "Operation" mtype: "" -- GitLab From a56838f7ca0e006c7fa7cbbdfa7eba80a3a53345 Mon Sep 17 00:00:00 2001 From: Amit <30853054+amitsrivastava78@users.noreply.github.com> Date: Thu, 14 Feb 2019 18:37:33 +0530 Subject: [PATCH 129/351] Updated export.cc Fixed some Typo errors --- tensorflow/lite/toco/tflite/export.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/lite/toco/tflite/export.cc b/tensorflow/lite/toco/tflite/export.cc index 53f5ecef87..46fc849612 100644 --- a/tensorflow/lite/toco/tflite/export.cc +++ b/tensorflow/lite/toco/tflite/export.cc @@ -63,12 +63,12 @@ bool IsControlFlowOp(const string& tensorflow_op) { return false; } -// Check if a TensorFlow Op is unsupportred by the Flex runtime. +// Check if a TensorFlow Op is unsupported by the Flex runtime. bool IsUnsupportedFlexOp(const string& tensorflow_op) { if (IsControlFlowOp(tensorflow_op)) { return true; } - // `HashTableV2` isn't supported for now since it requires an additinonal + // `HashTableV2` isn't supported for now since it requires an additional // initialization step. // TODO(b/117651199): Support `HashTableV2` with Flex runtime. if (tensorflow_op == "HashTableV2") { @@ -157,7 +157,7 @@ OperatorKey::OperatorKey( string(::tflite::kFlexCustomCodePrefix) + flex_tensorflow_op_; } else { // If Flex is disabled or the original TensorFlow NodeDef isn't available, - // we produce a custom op. This gives developers a chance to implemenr + // we produce a custom op. This gives developers a chance to implement // custom ops. custom_code_ = name; } -- GitLab From dc7c7ef544d0bff5033e0dbf8364a021a19306ef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 05:41:30 -0800 Subject: [PATCH 130/351] internal change PiperOrigin-RevId: 233938604 --- tensorflow/compiler/xla/tests/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index db1c927469..a67aa6ebfe 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1146,7 +1146,7 @@ xla_test( xla_test( name = "reduce_test", srcs = ["reduce_test.cc"], - shard_count = 40, + shard_count = 31, tags = [ "optonly", ], -- GitLab From b83f6be624fa8f66406cdc6823467a4754cdf932 Mon Sep 17 00:00:00 2001 From: Tamara Norman Date: Thu, 14 Feb 2019 06:07:47 -0800 Subject: [PATCH 131/351] Remove nn.*_backprop_* ops for TF2 PiperOrigin-RevId: 233941605 --- .../api_def_Conv3DBackpropFilterV2.pbtxt | 1 + tensorflow/python/ops/nn_ops.py | 128 ------------------ .../tools/api/golden/v2/tensorflow.nn.pbtxt | 12 -- tensorflow/tools/compatibility/renames_v2.py | 4 +- tensorflow/tools/compatibility/reorders_v2.py | 1 - .../tools/compatibility/tf_upgrade_v2.py | 24 ++-- .../tools/compatibility/tf_upgrade_v2_test.py | 9 +- 7 files changed, 19 insertions(+), 160 deletions(-) diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt index 590b37c95f..edbcba26ce 100644 --- a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt @@ -2,6 +2,7 @@ op { graph_op_name: "Conv3DBackpropFilterV2" endpoint { name: "nn.conv3d_backprop_filter" + deprecation_version: 2 } endpoint { name: "nn.conv3d_backprop_filter_v2" diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index cf4aa51b6e..031425a579 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1708,70 +1708,6 @@ def conv2d( # pylint: disable=redefined-builtin,dangerous-default-value name=name) -@tf_export("nn.conv2d_backprop_filter", v1=[]) -def conv2d_backprop_filter_v2(input, # pylint: disable=redefined-builtin - filter_sizes, - out_backprop, - strides, - padding, - data_format="NHWC", - dilations=None, - name=None): - r"""Computes the gradients of convolution with respect to the filter. - - Args: - input: A `Tensor`. Must be one of the following types: - `half`, `bfloat16`, `float32`, `float64`. - 4-D with shape `[batch, in_height, in_width, in_channels]`. - filter_sizes: A `Tensor` of type `int32`. - An integer vector representing the tensor shape of `filter`, - where `filter` is a 4-D - `[filter_height, filter_width, in_channels, out_channels]` tensor. - out_backprop: A `Tensor`. Must have the same type as `input`. - 4-D with shape `[batch, out_height, out_width, out_channels]`. - Gradients w.r.t. the output of the convolution. - strides: A list of `ints`. - The stride of the sliding window for each dimension of the input - of the convolution. Must be in the same order as the dimension specified - with format. - padding: Either the `string `"SAME"` or `"VALID"` indicating the type of - padding algorithm to use, or a list indicating the explicit paddings at - the start and end of each dimension. When explicit padding is used and - data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, - pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used - and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], - [pad_top, pad_bottom], [pad_left, pad_right]]`. - data_format: An optional `string` from: `"NHWC", "NCHW"`. - Defaults to `"NHWC"`. - Specify the data format of the input and output data. With the - default format "NHWC", the data is stored in the order of: - [batch, in_height, in_width, in_channels]. - Alternatively, the format could be "NCHW", the data storage order of: - [batch, in_channels, in_height, in_width]. - dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. - 1-D tensor of length 4. The dilation factor for each dimension of - `input`. If set to k > 1, there will be k-1 skipped cells between each - filter element on that dimension. The dimension order is determined by - the value of `data_format`, see above for details. Dilations in the batch - and depth dimensions must be 1. - name: A name for the operation (optional). - - Returns: - A `Tensor`. Has the same type as `input`. - """ - if dilations is None: - dilations = [1, 1, 1, 1] - return conv2d_backprop_filter(input, # pylint: disable=redefined-builtin - filter_sizes, - out_backprop, - strides, - padding, - use_cudnn_on_gpu=True, - data_format=data_format, - dilations=dilations, - name=name) - - @tf_export(v1=["nn.conv2d_backprop_filter"]) def conv2d_backprop_filter( # pylint: disable=redefined-builtin,dangerous-default-value input, @@ -1832,70 +1768,6 @@ def conv2d_backprop_filter( # pylint: disable=redefined-builtin,dangerous-defau explicit_paddings, data_format, dilations, name) -@tf_export("nn.conv2d_backprop_input", v1=[]) -def conv2d_backprop_input_v2(input_sizes, - filters, - out_backprop, - strides, - padding, - data_format="NHWC", - dilations=None, - name=None): - r"""Computes the gradients of convolution with respect to the input. - - Args: - input_sizes: A `Tensor` of type `int32`. - An integer vector representing the shape of `input`, - where `input` is a 4-D `[batch, height, width, channels]` tensor. - filters: A `Tensor`. Must be one of the following types: - `half`, `bfloat16`, `float32`, `float64`. - 4-D with shape - `[filter_height, filter_width, in_channels, out_channels]`. - out_backprop: A `Tensor`. Must have the same type as `filters`. - 4-D with shape `[batch, out_height, out_width, out_channels]`. - Gradients w.r.t. the output of the convolution. - strides: A list of `ints`. - The stride of the sliding window for each dimension of the input - of the convolution. Must be in the same order as the dimension specified - with format. - padding: Either the `string `"SAME"` or `"VALID"` indicating the type of - padding algorithm to use, or a list indicating the explicit paddings at - the start and end of each dimension. When explicit padding is used and - data_format is `"NHWC"`, this should be in the form `[[0, 0], [pad_top, - pad_bottom], [pad_left, pad_right], [0, 0]]`. When explicit padding used - and data_format is `"NCHW"`, this should be in the form `[[0, 0], [0, 0], - [pad_top, pad_bottom], [pad_left, pad_right]]`. - data_format: An optional `string` from: `"NHWC", "NCHW"`. - Defaults to `"NHWC"`. - Specify the data format of the input and output data. With the - default format "NHWC", the data is stored in the order of: - [batch, in_height, in_width, in_channels]. - Alternatively, the format could be "NCHW", the data storage order of: - [batch, in_channels, in_height, in_width]. - dilations: An optional list of `ints`. Defaults to `[1, 1, 1, 1]`. - 1-D tensor of length 4. The dilation factor for each dimension of - `input`. If set to k > 1, there will be k-1 skipped cells between each - filter element on that dimension. The dimension order is determined by - the value of `data_format`, see above for details. Dilations in the batch - and depth dimensions must be 1. - name: A name for the operation (optional). - - Returns: - A `Tensor`. Has the same type as `filters`. - """ - if dilations is None: - dilations = [1, 1, 1, 1] - return conv2d_backprop_input(input_sizes, - filters, - out_backprop, - strides, - padding, - use_cudnn_on_gpu=True, - data_format=data_format, - dilations=dilations, - name=name) - - @tf_export(v1=["nn.conv2d_backprop_input"]) def conv2d_backprop_input( # pylint: disable=redefined-builtin,dangerous-default-value input_sizes, diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt index 24ed1d0e43..946ca19706 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nn.pbtxt @@ -60,14 +60,6 @@ tf_module { name: "conv2d" argspec: "args=[\'input\', \'filters\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], " } - member_method { - name: "conv2d_backprop_filter" - argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], " - } - member_method { - name: "conv2d_backprop_input" - argspec: "args=[\'input_sizes\', \'filters\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NHWC\', \'None\', \'None\'], " - } member_method { name: "conv2d_transpose" argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NHWC\', \'None\', \'None\'], " @@ -76,10 +68,6 @@ tf_module { name: "conv3d" argspec: "args=[\'input\', \'filters\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'None\', \'None\'], " } - member_method { - name: "conv3d_backprop_filter" - argspec: "args=[\'input\', \'filter_sizes\', \'out_backprop\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'NDHWC\', \'[1, 1, 1, 1, 1]\', \'None\'], " - } member_method { name: "conv3d_transpose" argspec: "args=[\'input\', \'filters\', \'output_shape\', \'strides\', \'padding\', \'data_format\', \'dilations\', \'name\'], varargs=None, keywords=None, defaults=[\'SAME\', \'NDHWC\', \'None\', \'None\'], " diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py index 32501811a9..3f8372b51d 100644 --- a/tensorflow/tools/compatibility/renames_v2.py +++ b/tensorflow/tools/compatibility/renames_v2.py @@ -423,7 +423,9 @@ renames = { 'tf.model_variables': 'tf.compat.v1.model_variables', 'tf.moving_average_variables': 'tf.compat.v1.moving_average_variables', 'tf.nn.bidirectional_dynamic_rnn': 'tf.compat.v1.nn.bidirectional_dynamic_rnn', - 'tf.nn.conv3d_backprop_filter_v2': 'tf.nn.conv3d_backprop_filter', + 'tf.nn.conv2d_backprop_filter': 'tf.compat.v1.nn.conv2d_backprop_filter', + 'tf.nn.conv3d_backprop_filter': 'tf.compat.v1.nn.conv3d_backprop_filter', + 'tf.nn.conv3d_backprop_filter_v2': 'tf.compat.v1.nn.conv3d_backprop_filter_v2', 'tf.nn.ctc_beam_search_decoder_v2': 'tf.nn.ctc_beam_search_decoder', 'tf.nn.ctc_loss_v2': 'tf.nn.ctc_loss', 'tf.nn.depthwise_conv2d_native': 'tf.compat.v1.nn.depthwise_conv2d_native', diff --git a/tensorflow/tools/compatibility/reorders_v2.py b/tensorflow/tools/compatibility/reorders_v2.py index 8acd17d73f..3e897e22d7 100644 --- a/tensorflow/tools/compatibility/reorders_v2.py +++ b/tensorflow/tools/compatibility/reorders_v2.py @@ -60,7 +60,6 @@ reorders = { 'tf.multinomial': ['logits', 'num_samples', 'seed', 'name', 'output_dtype'], 'tf.nn.conv1d': ['value', 'filters', 'stride', 'padding', 'use_cudnn_on_gpu', 'data_format', 'name', 'input', 'dilations'], 'tf.nn.conv2d': ['input', 'filter', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'], - 'tf.nn.conv2d_backprop_filter': ['input', 'filter_sizes', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name'], 'tf.nn.conv2d_backprop_input': ['input_sizes', 'filter', 'out_backprop', 'strides', 'padding', 'use_cudnn_on_gpu', 'data_format', 'dilations', 'name', 'filters'], 'tf.nn.convolution': ['input', 'filter', 'padding', 'strides', 'dilation_rate', 'name', 'data_format', 'filters', 'dilations'], 'tf.nn.crelu': ['features', 'name', 'axis'], diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index 25b320b982..c2d7ab65f5 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -404,12 +404,11 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "filter": "filters", "use_cudnn_on_gpu": None, }, - "tf.nn.conv2d_backprop_filter": { - "use_cudnn_on_gpu": None, - }, "tf.nn.conv2d_backprop_input": { - "filter": "filters", "use_cudnn_on_gpu": None, + "input_sizes": "output_shape", + "out_backprop": "input", + "filter": "filters", }, "tf.contrib.summary.audio": { "family": None, @@ -729,18 +728,18 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.compat.v1.debugging.assert_rank_in", "tf.assert_rank": "tf.compat.v1.assert_rank", - "tf.contrib.framework.argsort": - "tf.argsort", "tf.nn.max_pool": "tf.nn.max_pool2d", - 'tf.keras.initializers.zeros': - 'tf.compat.v1.keras.initializers.zeros', - 'tf.keras.initializers.ones': - 'tf.compat.v1.keras.initializers.ones', - 'tf.keras.initializers.constant': - 'tf.compat.v1.keras.initializers.constant', + "tf.keras.initializers.zeros": + "tf.compat.v1.keras.initializers.zeros", + "tf.keras.initializers.ones": + "tf.compat.v1.keras.initializers.ones", + "tf.keras.initializers.constant": + "tf.compat.v1.keras.initializers.constant", "tf.data.experimental.map_and_batch_with_legacy_function": "tf.compat.v1.data.experimental.map_and_batch_with_legacy_function", + "tf.nn.conv2d_backprop_input": + "tf.nn.conv2d_transpose" } # pylint: enable=line-too-long @@ -774,7 +773,6 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.convert_to_tensor", "tf.nn.conv1d", "tf.nn.conv2d", - "tf.nn.conv2d_backprop_filter", "tf.nn.conv2d_backprop_input", "tf.nn.ctc_beam_search_decoder", "tf.nn.moments", diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py index dc03f37a80..98d9cfc381 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2_test.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2_test.py @@ -913,9 +913,8 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map "tf.nn.conv2d_backprop_filter(input, filter_sizes, out_backprop, " "strides, padding, use_cudnn_on_gpu, data_format)") expected_text = ( - "tf.nn.conv2d_backprop_filter(input=input, filter_sizes=filter_sizes, " - "out_backprop=out_backprop, strides=strides, padding=padding, " - "data_format=data_format)") + "tf.compat.v1.nn.conv2d_backprop_filter(input, filter_sizes, " + "out_backprop, strides, padding, use_cudnn_on_gpu, data_format)") _, unused_report, unused_errors, new_text = self._upgrade(text) self.assertEqual(new_text, expected_text) @@ -924,8 +923,8 @@ bazel-bin/tensorflow/tools/compatibility/update/generate_v2_reorders_map "tf.nn.conv2d_backprop_input(input_sizes, filter, out_backprop, " "strides, padding, use_cudnn_on_gpu, data_format)") expected_text = ( - "tf.nn.conv2d_backprop_input(input_sizes=input_sizes, filters=filter, " - "out_backprop=out_backprop, strides=strides, padding=padding, " + "tf.nn.conv2d_transpose(output_shape=input_sizes, filters=filter, " + "input=out_backprop, strides=strides, padding=padding, " "data_format=data_format)") _, unused_report, unused_errors, new_text = self._upgrade(text) self.assertEqual(new_text, expected_text) -- GitLab From 5cebc60244d54890620393bc6529f59612262e4b Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Thu, 14 Feb 2019 06:20:19 -0800 Subject: [PATCH 132/351] Ensure custom metaclasses are supported with tf.Module. PiperOrigin-RevId: 233942910 --- tensorflow/python/module/module.py | 2 +- tensorflow/python/module/module_test.py | 38 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py index 5fe7644ee3..53b410985d 100644 --- a/tensorflow/python/module/module.py +++ b/tensorflow/python/module/module.py @@ -55,7 +55,7 @@ class ModuleMetaclass(type): value.fdel if not value.fdel else with_name_scope(value.fdel), doc=value.__doc__) - return type.__new__(mcs, name, bases, clsdict) + return super(ModuleMetaclass, mcs).__new__(mcs, name, bases, clsdict) def __call__(cls, *args, **kwargs): # Call new such that we have an un-initialized module instance that we can diff --git a/tensorflow/python/module/module_test.py b/tensorflow/python/module/module_test.py index 21cc6c569f..62d9fe1237 100644 --- a/tensorflow/python/module/module_test.py +++ b/tensorflow/python/module/module_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import collections from absl.testing import parameterized +import six from tensorflow.python.compat import v2_compat from tensorflow.python.eager import def_function @@ -415,6 +416,43 @@ class SimpleModule(module.Module): IS_MEMBER = lambda v: isinstance(v, MemberType) IS_MODULE = lambda v: isinstance(v, module.Module) + +class CustomMetaclass(type): + + TAG = "__custom_metaclass__" + + def __new__(mcs, name, bases, clsdict): + new_type = super(CustomMetaclass, mcs).__new__(mcs, name, bases, clsdict) + setattr(new_type, CustomMetaclass.TAG, True) + return new_type + + +class CombiningMetaclass(module.ModuleMetaclass, CustomMetaclass): + + TAG = "__combining_metaclass__" + + def __new__(mcs, name, bases, clsdict): + new_type = super(CombiningMetaclass, mcs).__new__(mcs, name, bases, clsdict) + setattr(new_type, CombiningMetaclass.TAG, True) + return new_type + + +@six.add_metaclass(CombiningMetaclass) +class ModuleWithCustomMetaclass(module.Module): + + def __init__(self): + super(ModuleWithCustomMetaclass, self).__init__() + self.init_name_scope = get_name_scope() + + +class CustomMetaclassTest(test.TestCase): + + def testSupportsCustomMetaclass(self): + m = ModuleWithCustomMetaclass() + self.assertEqual(m.init_name_scope, "module_with_custom_metaclass/") + self.assertTrue(getattr(ModuleWithCustomMetaclass, CombiningMetaclass.TAG)) + self.assertTrue(getattr(ModuleWithCustomMetaclass, CustomMetaclass.TAG)) + if __name__ == "__main__": v2_compat.enable_v2_behavior() test.main() -- GitLab From 1597599e7e7846dda3134b5f3a14cfb40669389a Mon Sep 17 00:00:00 2001 From: Chris Jones Date: Thu, 14 Feb 2019 06:53:25 -0800 Subject: [PATCH 133/351] Support XLA cross-replica sum on integer types from TensorFlow. PiperOrigin-RevId: 233946689 --- tensorflow/core/ops/tpu_cross_replica_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/tpu_cross_replica_ops.cc b/tensorflow/core/ops/tpu_cross_replica_ops.cc index 34fa5bd549..f2ffb3acd1 100644 --- a/tensorflow/core/ops/tpu_cross_replica_ops.cc +++ b/tensorflow/core/ops/tpu_cross_replica_ops.cc @@ -76,7 +76,7 @@ REGISTER_OP("CrossReplicaSum") .Input("input: T") .Input("group_assignment: int32") .Output("output: T") - .Attr("T: {bfloat16, float}") + .Attr("T: {bfloat16, float, int32, uint32}") .SetShapeFn(shape_inference::UnchangedShape); REGISTER_OP("CollectivePermute") -- GitLab From 74e49b2b1771e01e697b0fffe6d0b1531784322e Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 14 Feb 2019 07:40:07 -0800 Subject: [PATCH 134/351] Automated rollback of commit 269ec76458f44da3407f272adaa62c5e0d32403b PiperOrigin-RevId: 233953070 --- tensorflow/compiler/tf2xla/kernels/binary_ops.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index 66446106d3..ad6b334326 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -79,10 +79,7 @@ static xla::XlaOp DivNoNanImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, XLA_MAKE_BINARY(DivNoNan, DivNoNanImpl(b, input_type(0), lhs, rhs, broadcast_helper)); -// Implementation of FloorDiv. -// -// For floating-point values, simply returns floor(x / y). For integers, does: -// +// Implementation of FloorDiv. Pseudo-code: // if ((x < 0) != (y < 0)) { // T abs_x = std::abs(x); // T abs_y = std::abs(y); @@ -93,9 +90,6 @@ XLA_MAKE_BINARY(DivNoNan, static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, xla::XlaOp y, const BCast& broadcast_helper) { std::tie(x, y) = XlaBinaryOp::Broadcast(x, y, broadcast_helper); - if (DataTypeIsFloating(dtype)) { - return xla::Floor(xla::Div(x, y)); - } if (DataTypeIsUnsigned(dtype)) { return xla::Div(x, y); } @@ -105,7 +99,11 @@ static xla::XlaOp FloorDivImpl(xla::XlaBuilder* b, DataType dtype, xla::XlaOp x, auto abs_x = xla::Abs(x); auto abs_y = xla::Abs(y); auto t = xla::Neg(xla::Sub(xla::Add(abs_x, abs_y), one)); - return xla::Select(different_sign, xla::Div(t, abs_y), xla::Div(x, y)); + auto result = xla::Select(different_sign, xla::Div(t, abs_y), xla::Div(x, y)); + if (DataTypeIsFloating(dtype)) { + result = xla::Floor(result); + } + return result; } XLA_MAKE_BINARY(FloorDiv, FloorDivImpl(b, input_type(0), lhs, rhs, broadcast_helper)); -- GitLab From f54635e27fb2789df1e6b29f92d41afb3d974814 Mon Sep 17 00:00:00 2001 From: Chris Jones Date: Thu, 14 Feb 2019 08:10:58 -0800 Subject: [PATCH 135/351] Support all numeric type in XLA all-to-all from TensorFlow. PiperOrigin-RevId: 233958133 --- tensorflow/core/ops/tpu_cross_replica_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/ops/tpu_cross_replica_ops.cc b/tensorflow/core/ops/tpu_cross_replica_ops.cc index f2ffb3acd1..c26b49eb34 100644 --- a/tensorflow/core/ops/tpu_cross_replica_ops.cc +++ b/tensorflow/core/ops/tpu_cross_replica_ops.cc @@ -26,7 +26,7 @@ REGISTER_OP("AllToAll") .Input("input: T") .Input("group_assignment: int32") .Output("output: T") - .Attr("T: {bfloat16, float}") + .Attr("T: {numbertype, bool}") .Attr("concat_dimension: int") .Attr("split_dimension: int") .Attr("split_count: int") -- GitLab From 93b35c267fcb1ed5fd58fb88e5fc686fc4462f20 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 08:18:58 -0800 Subject: [PATCH 136/351] Update ops-related pbtxt files. PiperOrigin-RevId: 233959297 --- .../core/ops/compat/ops_history.v1.pbtxt | 80 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 20 ++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 054d96e2f1..c88b14ea10 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -1583,6 +1583,59 @@ op { type: "int" } } +op { + name: "AllToAll" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + type: DT_BOOL + } + } + } + attr { + name: "concat_dimension" + type: "int" + } + attr { + name: "split_dimension" + type: "int" + } + attr { + name: "split_count" + type: "int" + } +} op { name: "Angle" input_arg { @@ -16543,6 +16596,33 @@ op { } } } +op { + name: "CrossReplicaSum" + input_arg { + name: "input" + type_attr: "T" + } + input_arg { + name: "group_assignment" + type: DT_INT32 + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "T" + type: "type" + allowed_values { + list { + type: DT_BFLOAT16 + type: DT_FLOAT + type: DT_INT32 + type: DT_UINT32 + } + } + } +} op { name: "CudnnRNN" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 20cc2596d9..75002e197a 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -670,8 +670,24 @@ op { type: "type" allowed_values { list { - type: DT_BFLOAT16 type: DT_FLOAT + type: DT_DOUBLE + type: DT_INT32 + type: DT_UINT8 + type: DT_INT16 + type: DT_INT8 + type: DT_COMPLEX64 + type: DT_INT64 + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT32 + type: DT_BFLOAT16 + type: DT_UINT16 + type: DT_COMPLEX128 + type: DT_HALF + type: DT_UINT32 + type: DT_UINT64 + type: DT_BOOL } } } @@ -7139,6 +7155,8 @@ op { list { type: DT_BFLOAT16 type: DT_FLOAT + type: DT_INT32 + type: DT_UINT32 } } } -- GitLab From 72c6be901e50b8402ad8f3f08c76910b63aa8ee9 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 14 Feb 2019 08:42:48 -0800 Subject: [PATCH 137/351] [XLA:Python] Refactor XLA client to support multiple platforms in the same process. Use device ordinals when naming devices for execution, buffer placement, infeed, and outfeed. A replica is a property of a computation, and computations may have different numbers of replicas. Different numbers of replicas may be mapped onto a particular topology of devices differently, so a replica number does not uniquely name a device. PiperOrigin-RevId: 233962949 --- .../compiler/xla/executable_run_options.cc | 2 +- .../compiler/xla/executable_run_options.h | 4 +- tensorflow/compiler/xla/python/BUILD | 1 + .../xla/python/local_computation_builder.cc | 309 +++++++----------- .../xla/python/local_computation_builder.h | 87 ++--- .../xla/python/local_computation_builder.i | 38 ++- tensorflow/compiler/xla/python/xla_client.py | 178 ++++++---- 7 files changed, 317 insertions(+), 302 deletions(-) diff --git a/tensorflow/compiler/xla/executable_run_options.cc b/tensorflow/compiler/xla/executable_run_options.cc index 0f9b591c70..230f3b202a 100644 --- a/tensorflow/compiler/xla/executable_run_options.cc +++ b/tensorflow/compiler/xla/executable_run_options.cc @@ -77,7 +77,7 @@ ExecutionProfile* ExecutableRunOptions::execution_profile() const { } ExecutableRunOptions& ExecutableRunOptions::set_device_assignment( - DeviceAssignment* device_assignment) { + const DeviceAssignment* device_assignment) { device_assignment_ = device_assignment; return *this; } diff --git a/tensorflow/compiler/xla/executable_run_options.h b/tensorflow/compiler/xla/executable_run_options.h index 6f36d11dfb..1e744953bd 100644 --- a/tensorflow/compiler/xla/executable_run_options.h +++ b/tensorflow/compiler/xla/executable_run_options.h @@ -74,7 +74,7 @@ class ExecutableRunOptions { ExecutableRunOptions& set_execution_profile(ExecutionProfile* profile); ExecutableRunOptions& set_device_assignment( - DeviceAssignment* device_assignment); + const DeviceAssignment* device_assignment); const DeviceAssignment* device_assignment() const; ExecutableRunOptions& set_rng_seed(int rng_seed); @@ -83,7 +83,7 @@ class ExecutableRunOptions { private: DeviceMemoryAllocator* allocator_ = nullptr; int device_ordinal_ = -1; - DeviceAssignment* device_assignment_ = nullptr; + const DeviceAssignment* device_assignment_ = nullptr; stream_executor::Stream* stream_ = nullptr; const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr; ExecutionProfile* execution_profile_ = nullptr; diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index f7e2d26b7a..a0687e0d52 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -77,6 +77,7 @@ cc_library( "//tensorflow/compiler/xla/client/lib:cholesky", "//tensorflow/compiler/xla/client/lib:math", "//tensorflow/compiler/xla/client/lib:qr", + "//tensorflow/compiler/xla/service:computation_placer", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 77bf51cb45..671953aefe 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/executable_run_options.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" #include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -53,74 +54,6 @@ namespace swig { // TODO(b/118641336): Factor out XRT parts into a small c++ library of their // own. -// TODO(b/34473877) Ideally XLA would support AllReduce among arbitrary sets of -// device handles instead of needing to set the number of replicas at XLA -// service initialization time. -tensorflow::mutex g_local_client_mutex(tensorflow::LINKER_INITIALIZED); -int g_replica_count GUARDED_BY(g_local_client_mutex) = 1; -LocalClient* g_local_client GUARDED_BY(g_local_client_mutex) = nullptr; - -string* GetPlatformNameString() { - static string* platform_name_string PT_GUARDED_BY(g_local_client_mutex) = - new string("Host"); - return platform_name_string; -} - -Status InitializeReplicaCount(int replica_count) { - if (replica_count < 1) { - return InvalidArgument("Replica count must be >= 1; got %d.", - replica_count); - } - tensorflow::mutex_lock lock(g_local_client_mutex); - if (g_local_client != nullptr) { - return FailedPrecondition( - "Attempted to set the replica count to %d, but a local XLA service was " - "previously created with a replica count of %d.", - replica_count, g_replica_count); - } - g_replica_count = replica_count; - return Status::OK(); -} - -Status InitializePlatformName(const string& platform_name) { - string* g_platform_name = GetPlatformNameString(); - tensorflow::mutex_lock lock(g_local_client_mutex); - if (g_local_client != nullptr) { - return FailedPrecondition( - "Attempted to set the platform name to %s, but a local XLA service was " - "previously created with a platform name of %s.", - platform_name, *g_platform_name); - } - TF_ASSIGN_OR_RETURN(se::Platform * platform, - PlatformUtil::GetPlatform(platform_name)); - if (platform->VisibleDeviceCount() <= 0) { - return InvalidArgument("Platform %s has no visible devices.", - platform_name); - } - *g_platform_name = platform_name; - return Status::OK(); -} - -int GetReplicaCount() { - tensorflow::mutex_lock lock(g_local_client_mutex); - return g_replica_count; -} - -StatusOr GetOrCreateLocalClient() { - string* platform_name = GetPlatformNameString(); - tensorflow::mutex_lock lock(g_local_client_mutex); - if (g_local_client != nullptr) { - return g_local_client; - } - LocalClientOptions options; - options.set_platform(PlatformUtil::GetPlatform(*platform_name).ValueOrDie()); - options.set_number_of_replicas(g_replica_count); - TF_ASSIGN_OR_RETURN(g_local_client, - ClientLibrary::GetOrCreateLocalClient(options)); - CHECK(g_local_client != nullptr); - return g_local_client; -} - Status RegisterCpuCustomCallTarget(const string& fn_name, PyObject* capsule) { const char* name = "xla._CPU_CUSTOM_CALL_TARGET"; if (!PyCapsule_IsValid(capsule, name)) { @@ -135,62 +68,66 @@ Status RegisterCpuCustomCallTarget(const string& fn_name, PyObject* capsule) { return Status::OK(); } -Status TransferToInfeedLocal(const Literal& literal) { - VLOG(1) << "Infeeding literal without replica number; shape: " - << literal.shape(); - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - return client->TransferToInfeedLocal(literal, /*device_ordinal=*/0); -} +LocalClient::LocalClient(xla::LocalClient* client) : client_(client) {} -Status TransferToInfeedLocalReplica(const Literal& literal, - int replica_number) { - VLOG(1) << "Infeeding shape " << literal.shape() - << " to replica number: " << replica_number; - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - TF_ASSIGN_OR_RETURN(int device_ordinal, - client->ReplicaNumberToDeviceOrdinal(replica_number)); - return client->TransferToInfeedLocal(literal, device_ordinal); +/* static */ StatusOr LocalClient::Get( + const string& platform_name) { + TF_ASSIGN_OR_RETURN(se::Platform * platform, + PlatformUtil::GetPlatform(platform_name)); + if (platform->VisibleDeviceCount() <= 0) { + return InvalidArgument("Platform %s has no visible devices.", + platform_name); + } + LocalClientOptions options; + options.set_platform(platform); + TF_ASSIGN_OR_RETURN(xla::LocalClient * client, + ClientLibrary::GetOrCreateLocalClient(options)); + CHECK(client != nullptr); + return LocalClient(client); } -StatusOr TransferFromOutfeedLocalReplica(const Shape& shape, - int replica_number) { - VLOG(1) << "Outfeeding literal from replica number: " << replica_number - << " shape: " << shape; - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - TF_ASSIGN_OR_RETURN(int device_ordinal, - client->ReplicaNumberToDeviceOrdinal(replica_number)); - return client->TransferFromOutfeedLocal(shape, device_ordinal); +// Returns the number of devices known to the XLA client. +int LocalClient::DeviceCount() const { return client_->device_count(); } + +Status LocalClient::TransferToInfeed(const Literal& literal, + int device_ordinal) { + VLOG(1) << "Infeeding literal to device " << device_ordinal + << "; shape: " << literal.shape(); + return client_->TransferToInfeed(literal, device_ordinal); } -static StatusOr ToBuffer(LocalClient* client, - int device_ordinal, - const Literal& arg) { - return client->LiteralToShapedBuffer(arg, device_ordinal, - client->backend().memory_allocator()); +StatusOr LocalClient::TransferFromOutfeed(const Shape& shape, + int device_ordinal) { + VLOG(1) << "Outfeeding literal from device " << device_ordinal + << "; shape: " << shape; + return client_->TransferFromOutfeed(&shape, device_ordinal); } /* static */ StatusOr LocalShapedBuffer::FromLiteral( const Literal& argument, const absl::optional& shape_with_layout, - int replica_number) { - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - TF_ASSIGN_OR_RETURN(int device_ordinal, - client->ReplicaNumberToDeviceOrdinal(replica_number)); - VLOG(1) << "Creating shaped buffer from literal on replica/ordinal: " - << replica_number << "/" << device_ordinal; + const LocalClient& client, int device_ordinal) { + VLOG(1) << "Creating shaped buffer from literal on device ordinal: " + << device_ordinal; + auto literal_to_buffer = [&](const Literal& arg) { + return client.client()->LiteralToShapedBuffer( + arg, device_ordinal, client.client()->backend().memory_allocator()); + }; + StatusOr buf = [&] { if (shape_with_layout) { Literal relaid = argument.Relayout(shape_with_layout.value()); - return ToBuffer(client, device_ordinal, relaid); + return literal_to_buffer(relaid); } - return ToBuffer(client, device_ordinal, argument); + return literal_to_buffer(argument); }(); TF_RETURN_IF_ERROR(buf.status()); - return new LocalShapedBuffer(std::move(buf).ValueOrDie()); + return new LocalShapedBuffer(std::move(buf).ValueOrDie(), client.client()); } -LocalShapedBuffer::LocalShapedBuffer(ScopedShapedBuffer shaped_buffer) - : shaped_buffer_(std::move(shaped_buffer)) {} +LocalShapedBuffer::LocalShapedBuffer(ScopedShapedBuffer shaped_buffer, + xla::LocalClient* client) + : shaped_buffer_(std::move(shaped_buffer)), client_(client) {} const ScopedShapedBuffer* LocalShapedBuffer::shaped_buffer() const { return &shaped_buffer_; @@ -203,8 +140,7 @@ const Shape& LocalShapedBuffer::shape() const { } StatusOr LocalShapedBuffer::ToLiteral() const { - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - return client->ShapedBufferToLiteral(*shaped_buffer()); + return client_->ShapedBufferToLiteral(*shaped_buffer()); } LocalShapedBufferTuple::LocalShapedBufferTuple( @@ -235,6 +171,51 @@ StatusOr LocalShapedBufferTuple::Release(int i) { int64 LocalShapedBufferTuple::size() const { return elements_.size(); } +StatusOr LocalShapedBuffer::DestructureTuple() { + const Shape tuple_shape = shape(); + + if (!tuple_shape.IsTuple()) { + return InvalidArgument( + "Attemped to destructure a LocalShapedBuffer that did not have a tuple " + "shape; shape: %s", + ShapeUtil::HumanString(tuple_shape)); + } + + DeviceMemoryAllocator* allocator = shaped_buffer()->memory_allocator(); + ShapedBuffer tuple_buffer = Release(); + + // Extract some metadata we use to construct scoped buffers. + const se::Platform* platform = tuple_buffer.platform(); + int device_ordinal = tuple_buffer.device_ordinal(); + + ShapeTree& shape_tree = tuple_buffer.buffers(); + std::vector results; + for (int64 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) { + // Create a shaped buffer for this destructured tuple element. + const Shape& subshape = ShapeUtil::GetSubshape(tuple_shape, {i}); + VLOG(3) << "Starting tuple element " << i << " subshape: " << subshape; + ShapedBuffer shaped_buffer(subshape, subshape, platform, device_ordinal); + + ShapeUtil::ForEachSubshape( + subshape, [&](const Shape& s, const ShapeIndex& index) { + ShapeIndex original(index); + original.push_front(i); + se::DeviceMemoryBase* device_memory = + shape_tree.mutable_element(original); + shaped_buffer.set_buffer(*device_memory, index); + *device_memory = se::DeviceMemoryBase(); + }); + + VLOG(3) << "Completed tuple element: " << i; + results.push_back(new LocalShapedBuffer( + ScopedShapedBuffer(std::move(shaped_buffer), allocator), client_)); + } + // Deallocate the root buffer. + se::DeviceMemoryBase root_buffer = tuple_buffer.root_buffer(); + TF_RETURN_IF_ERROR(allocator->Deallocate(device_ordinal, root_buffer)); + return new LocalShapedBufferTuple(std::move(results)); +} + XrtAllocation::XrtAllocation(int64 handle, Shape shape, const string& session_target) : handle_(handle), shape_(shape), session_target_(session_target) {} @@ -333,8 +314,21 @@ StatusOr XrtAllocationTuple::Release(int i) { int64 XrtAllocationTuple::size() const { return elements_.size(); } LocalExecutable::LocalExecutable( - std::unique_ptr executable) - : executable_(std::move(executable)) {} + std::unique_ptr executable, + xla::DeviceAssignment device_assignment, xla::LocalClient* client) + : executable_(std::move(executable)), + device_assignment_(std::move(device_assignment)), + client_(client) {} + +std::vector LocalExecutable::DeviceOrdinals() const { + int num_replicas = device_assignment_.replica_count(); + std::vector device_ordinals; + device_ordinals.reserve(num_replicas); + for (int i = 0; i < num_replicas; ++i) { + device_ordinals.push_back(device_assignment_(i, 0)); + } + return device_ordinals; +} StatusOr LocalExecutable::Execute( absl::Span argument_handles) { @@ -343,12 +337,8 @@ StatusOr LocalExecutable::Execute( "Attempted to execute computation with %d replicas using Execute()", num_replicas()); } - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - TF_ASSIGN_OR_RETURN(DeviceAssignment device_assignment, - client->backend().computation_placer()->AssignDevices( - 1, /*computation_count=*/1)); StatusOr result_buffer_status; - const int device_ordinal = device_assignment(0, 0); + const int device_ordinal = device_assignment_(0, 0); VLOG(3) << "Replica 0 mapped to device ordinal for execution: " << device_ordinal; @@ -360,10 +350,10 @@ StatusOr LocalExecutable::Execute( ExecutableRunOptions options; options.set_device_ordinal(device_ordinal); - options.set_allocator(client->backend().memory_allocator()); + options.set_allocator(client_->backend().memory_allocator()); options.set_intra_op_thread_pool( - client->backend().eigen_intra_op_thread_pool_device()); - options.set_device_assignment(&device_assignment); + client_->backend().eigen_intra_op_thread_pool_device()); + options.set_device_assignment(&device_assignment_); result_buffer_status = executable_->Run(argument_buffers, options); @@ -373,13 +363,13 @@ StatusOr LocalExecutable::Execute( "%s.", result_buffer_status.status().ToString()); } - return new LocalShapedBuffer(std::move(result_buffer_status).ValueOrDie()); + return new LocalShapedBuffer(std::move(result_buffer_status).ValueOrDie(), + client_); } StatusOr LocalExecutable::ExecutePerReplica( absl::Span> argument_handles) { - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); - const int num_devices = client->device_count(); + const int num_devices = client_->device_count(); if (argument_handles.size() != num_replicas()) { return InvalidArgument( @@ -394,14 +384,9 @@ StatusOr LocalExecutable::ExecutePerReplica( VLOG(1) << "Executing with " << num_replicas() << " replicas."; - TF_ASSIGN_OR_RETURN(DeviceAssignment device_assignment, - client->backend().computation_placer()->AssignDevices( - num_replicas(), /*computation_count=*/1)); - std::vector> results(num_replicas()); - auto execute = [this, client, &device_assignment, &argument_handles, - &results](int replica) { - const int device_ordinal = device_assignment(replica, 0); + auto execute = [this, &argument_handles, &results](int replica) { + const int device_ordinal = device_assignment_(replica, 0); VLOG(3) << "Replica " << replica << " mapped to device ordinal for execution: " << device_ordinal; @@ -413,10 +398,10 @@ StatusOr LocalExecutable::ExecutePerReplica( ExecutableRunOptions options; options.set_device_ordinal(device_ordinal); - options.set_allocator(client->backend().memory_allocator()); + options.set_allocator(client_->backend().memory_allocator()); options.set_intra_op_thread_pool( - client->backend().eigen_intra_op_thread_pool_device()); - options.set_device_assignment(&device_assignment); + client_->backend().eigen_intra_op_thread_pool_device()); + options.set_device_assignment(&device_assignment_); StatusOr result_buffer_status = executable_->Run(argument_buffers, options); @@ -448,7 +433,7 @@ StatusOr LocalExecutable::ExecutePerReplica( replica, statusor.status().ToString()); } wrapped_results[replica] = - new LocalShapedBuffer(std::move(statusor).ValueOrDie()); + new LocalShapedBuffer(std::move(statusor).ValueOrDie(), client_); } return new LocalShapedBufferTuple(std::move(wrapped_results)); @@ -532,22 +517,27 @@ Computation::Computation(XlaComputation computation) StatusOr Computation::Compile( const std::vector& argument_shapes, - const ExecutableBuildOptions* build_options) { + const ExecutableBuildOptions* build_options, const LocalClient& client) { std::vector argument_shape_pointers; argument_shape_pointers.reserve(argument_shapes.size()); for (auto& argument_shape : argument_shapes) { argument_shape_pointers.push_back(&argument_shape); } - TF_ASSIGN_OR_RETURN(LocalClient * client, GetOrCreateLocalClient()); ExecutableBuildOptions options; if (build_options != nullptr) { options = *build_options; } TF_ASSIGN_OR_RETURN( auto local_executable, - client->Compile(computation_, argument_shape_pointers, options)); - return new LocalExecutable(std::move(local_executable)); + client.client()->Compile(computation_, argument_shape_pointers, options)); + TF_ASSIGN_OR_RETURN( + DeviceAssignment device_assignment, + client.client()->backend().computation_placer()->AssignDevices( + options.num_replicas(), /*computation_count=*/1)); + + return new LocalExecutable(std::move(local_executable), + std::move(device_assignment), client.client()); } StatusOr Computation::CompileForXrt( @@ -1047,53 +1037,6 @@ void DeleteXrtExecutable(XrtExecutable* computation) { delete computation; } void DeleteComputation(Computation* computation) { delete computation; } -StatusOr DestructureLocalShapedBufferTuple( - LocalShapedBuffer* local_shaped_buffer) { - const Shape tuple_shape = local_shaped_buffer->shape(); - - if (!tuple_shape.IsTuple()) { - return InvalidArgument( - "Attemped to destructure a LocalShapedBuffer that did not have a tuple " - "shape; shape: %s", - ShapeUtil::HumanString(tuple_shape)); - } - - DeviceMemoryAllocator* allocator = - local_shaped_buffer->shaped_buffer()->memory_allocator(); - ShapedBuffer tuple_buffer = local_shaped_buffer->Release(); - - // Extract some metadata we use to construct scoped buffers. - const se::Platform* platform = tuple_buffer.platform(); - int device_ordinal = tuple_buffer.device_ordinal(); - - ShapeTree& shape_tree = tuple_buffer.buffers(); - std::vector results; - for (int64 i = 0; i < ShapeUtil::TupleElementCount(tuple_shape); ++i) { - // Create a shaped buffer for this destructured tuple element. - const Shape& subshape = ShapeUtil::GetSubshape(tuple_shape, {i}); - VLOG(3) << "Starting tuple element " << i << " subshape: " << subshape; - ShapedBuffer shaped_buffer(subshape, subshape, platform, device_ordinal); - - ShapeUtil::ForEachSubshape( - subshape, [&](const Shape& s, const ShapeIndex& index) { - ShapeIndex original(index); - original.push_front(i); - se::DeviceMemoryBase* device_memory = - shape_tree.mutable_element(original); - shaped_buffer.set_buffer(*device_memory, index); - *device_memory = se::DeviceMemoryBase(); - }); - - VLOG(3) << "Completed tuple element: " << i; - results.push_back(new LocalShapedBuffer( - ScopedShapedBuffer(std::move(shaped_buffer), allocator))); - } - // Deallocate the root buffer. - se::DeviceMemoryBase root_buffer = tuple_buffer.root_buffer(); - TF_RETURN_IF_ERROR(allocator->Deallocate(device_ordinal, root_buffer)); - return new LocalShapedBufferTuple(std::move(results)); -} - StatusOr DestructureXrtAllocationTuple( XrtAllocation* allocation, const string& session_target) { const Shape& tuple_shape = allocation->shape(); diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index c9e93fb5aa..9ff46d57dc 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -35,42 +35,42 @@ limitations under the License. namespace xla { namespace swig { -// Initializes the number of replicas that XLA will be initialized with (when -// first obtaining a handle to the local XLA service). If this is called after -// the handle to the local XLA service has been established, then an error is -// returned. -Status InitializeReplicaCount(int replica_count); - -// Initializes the platform name that XLA will be initialized with (when -// first obtaining a handle to the local XLA service). If this is called after -// the handle to the local XLA service has been established, then an error is -// returned. -Status InitializePlatformName(const string& platform_name); - -// Returns the replica count that is currently set, regardless of whether the -// local XLA service has been instantiated yet or not. -int GetReplicaCount(); - // Registers a 'fn_capsule' as a CPU custom call target. // 'fn_capsule' is a void* pointer encapsulated in a PyCapsule object, with name // "xla._CPU_CUSTOM_CALL_TARGET". Status RegisterCpuCustomCallTarget(const string& name, PyObject* fn_capsule); -// Wraps the local client's infeed-transfer function. -// -// The default device ordinal (0) is used. -Status TransferToInfeedLocal(const Literal& literal); +// Wrapper around an xla::LocalClient. +class LocalClient { + public: + // Initializes a local XLA client for `platform_name`. Returns an error if no + /// such platform exists, or if the platform has no visible devices. + static StatusOr Get(const string& platform_name); -// Transfers the given literal to the infeed of the given replica. -// -// The replica number is resolved to an appropriate device ordinal. -Status TransferToInfeedLocalReplica(const Literal& literal, int replica_number); + // Copyable and moveable; the class is just a wrapper around a + // xla::LocalClient pointer for convenient SWIG wrapping. -// Transfers a literal of the given shape from the outfeed of the given replica. -// -// The replica number is resolved to an appropriate device ordinal. -StatusOr TransferFromOutfeedLocalReplica(const Shape& shape, - int replica_number); + // Returns the number of devices known to the XLA client. + int DeviceCount() const; + + // Wraps the local client's infeed-transfer function. + // + // The default device ordinal (0) is used. + Status TransferToInfeed(const Literal& literal, int device_ordinal); + + // Transfers a literal of the given shape from the outfeed of the given + // replica. + StatusOr TransferFromOutfeed(const Shape& shape, int device_ordinal); + + xla::LocalClient* client() const { return client_; } + + private: + LocalClient(xla::LocalClient* client); + + xla::LocalClient* client_; +}; + +class LocalShapedBufferTuple; // Represents a reference to literals that live in a device-allocated buffer via // XLA. Specifically, wraps a ScopedShapedBuffer produced by transferring a @@ -79,9 +79,9 @@ class LocalShapedBuffer { public: static StatusOr FromLiteral( const Literal& argument, const absl::optional& shape_with_layout, - int replica_number); + const LocalClient& client, int device_ordinal); - LocalShapedBuffer(ScopedShapedBuffer shaped_buffer); + LocalShapedBuffer(ScopedShapedBuffer shaped_buffer, xla::LocalClient* client); StatusOr ToLiteral() const; const Shape& shape() const; const ScopedShapedBuffer* shaped_buffer() const; @@ -90,8 +90,13 @@ class LocalShapedBuffer { // analogous to std::unique_ptr::release(). ShapedBuffer Release(); + // Destructures a tuple-valued LocalShapedBuffer into its constitutent + // elements in LocalShapedBufferTuple form. + StatusOr DestructureTuple(); + private: ScopedShapedBuffer shaped_buffer_; + xla::LocalClient* client_; }; // Result of a tuple destructuring operation on a LocalShapedBuffer -- this @@ -117,11 +122,6 @@ class LocalShapedBufferTuple { std::vector elements_; }; -// Destructures a tuple-valued LocalShapedBuffer into its constitutent elements -// in LocalShapedBufferTuple form. -StatusOr DestructureLocalShapedBufferTuple( - LocalShapedBuffer* local_shaped_buffer); - // Represents a reference to literals that live in a device-allocated buffer via // XRT. Specifically, wraps an int64 handle produced by running the allocation // graph, and an XLA shape to track the referent's shape. @@ -178,12 +178,17 @@ StatusOr DestructureXrtAllocationTuple( // device-allocated literals. Specifically, wraps an XLA LocalExecutable. class LocalExecutable { public: - LocalExecutable(std::unique_ptr executable); + LocalExecutable(std::unique_ptr executable, + xla::DeviceAssignment device_assignment, + xla::LocalClient* client); int num_replicas() const { return executable_->build_options().num_replicas(); } + // Returns the device ordinals to which each replica is assigned. + std::vector DeviceOrdinals() const; + StatusOr Execute( absl::Span argument_handles); @@ -194,7 +199,9 @@ class LocalExecutable { absl::Span > argument_handles); private: - std::unique_ptr executable_; + const std::unique_ptr executable_; + const xla::DeviceAssignment device_assignment_; + xla::LocalClient* const client_; }; // Represents a compiled computation that can be executed given handles to @@ -207,6 +214,8 @@ class XrtExecutable { const string& session_target); ~XrtExecutable(); + std::vector DeviceOrdinals() const { return {0}; } + StatusOr Execute( absl::Span argument_handles); @@ -229,7 +238,7 @@ class Computation { StatusOr Compile( const std::vector& argument_shapes, - const ExecutableBuildOptions* build_options); + const ExecutableBuildOptions* build_options, const LocalClient& client); // Accepts a `session_target` argument, used in constructing the // `tensorflow::ClientSession` instance in which the compilation graph is run. diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index e47227da61..5327ce91db 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -23,6 +23,7 @@ limitations under the License. // C++ Python // -------------------------------------+--------------------------------------- // Span <- sequence of int +// vector -> sequence of int // Span <- sequence of LocalOp // Literal <-> (nested tuple of) numpy ndarray // std::vector <- sequence of (nested tuple of) ndarray @@ -215,6 +216,15 @@ tensorflow::ImportNumpy(); // Basic types + +%typemap(out) std::vector { + PyObject* out = PyList_New($1.size()); + for (int i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM(out, i, PyInt_FromLong($1[i])); + } + $result = out; +} + %typemap(out) StatusOr { if ($1.ok()) { $result = PyBool_FromLong($1.ConsumeValueOrDie()); @@ -288,6 +298,19 @@ tensorflow::ImportNumpy(); // Computation and buffer/allocation types +%typemap(out) StatusOr { + if ($1.ok()) { + xla::swig::LocalClient value = $1.ValueOrDie(); + { + auto $1 = value; + $typemap(out, xla::swig::LocalClient) + } + } else { + PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); + SWIG_fail; + } +} + %typemap(out) StatusOr { if ($1.ok()) { auto* value = $1.ValueOrDie(); @@ -979,17 +1002,17 @@ tensorflow::ImportNumpy(); %ignoreall %unignore xla; %unignore xla::swig; -%unignore xla::swig::InitializeReplicaCount; -%unignore xla::swig::InitializePlatformName; -%unignore xla::swig::GetReplicaCount; %unignore xla::swig::RegisterCpuCustomCallTarget; -%unignore xla::swig::TransferToInfeedLocal; -%unignore xla::swig::TransferToInfeedLocalReplica; -%unignore xla::swig::TransferFromOutfeedLocalReplica; +%unignore xla::swig::LocalClient; +%unignore xla::swig::LocalClient::Get; +%unignore xla::swig::LocalClient::DeviceCount; +%unignore xla::swig::LocalClient::TransferToInfeed; +%unignore xla::swig::LocalClient::TransferFromOutfeed; %unignore xla::swig::LocalShapedBuffer; %unignore xla::swig::LocalShapedBuffer::FromLiteral; %unignore xla::swig::LocalShapedBuffer::ToLiteral; %unignore xla::swig::LocalShapedBuffer::shape; +%unignore xla::swig::LocalShapedBuffer::DestructureTuple; %unignore xla::swig::LocalShapedBufferTuple; %unignore xla::swig::LocalShapedBufferTuple::Release; %unignore xla::swig::LocalShapedBufferTuple::size; @@ -1001,9 +1024,11 @@ tensorflow::ImportNumpy(); %unignore xla::swig::XrtAllocationTuple::Release; %unignore xla::swig::XrtAllocationTuple::size; %unignore xla::swig::LocalExecutable; +%unignore xla::swig::LocalExecutable::DeviceOrdinals; %unignore xla::swig::LocalExecutable::Execute; %unignore xla::swig::LocalExecutable::ExecutePerReplica; %unignore xla::swig::XrtExecutable; +%unignore xla::swig::XrtExecutable::DeviceOrdinals; %unignore xla::swig::XrtExecutable::Execute; %unignore xla::swig::Computation; %unignore xla::swig::Computation::Compile; @@ -1128,7 +1153,6 @@ tensorflow::ImportNumpy(); %unignore xla::swig::ComputationBuilder::Gather; %unignore xla::swig::ComputationBuilder::Scatter; %unignore xla::swig::DeleteComputation; -%unignore xla::swig::DestructureLocalShapedBufferTuple; %unignore xla::swig::DestructureXrtAllocationTuple; %unignore xla::swig::DeleteLocalShapedBuffer; %unignore xla::swig::DeleteXrtAllocation; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index fd65c9dc71..eb109ec157 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -66,6 +66,10 @@ OpMetadata = collections.namedtuple('OpMetadata', _OP_METADATA_FIELDS) class Backend(object): """Abstract base class for XLA backends.""" + @abc.abstractmethod + def device_count(self): + """Returns the number of devices known to the backend.""" + @abc.abstractmethod def buffer_from_pyval(self, pyval, device=0): """Allocates a fresh buffer and populates it with `pyval`.""" @@ -95,21 +99,35 @@ class Backend(object): """Runs an executable in a replicated manner.""" +def _maybe_encode_string(s): + if six.PY3: + return s.encode('utf-8') + else: + return s + + class XlaLocalBackend(Backend): """XLA backend implemented using the in-process xla::LocalClient API.""" + def __init__(self, platform=None): + platform = platform or _get_default_platform_name() + self.client = c_api.LocalClient.Get(_maybe_encode_string(platform)) + + def device_count(self): + return self.client.DeviceCount() + def buffer_from_pyval(self, pyval, device=0): - return c_api.LocalShapedBuffer.FromLiteral(pyval, None, device) + return c_api.LocalShapedBuffer.FromLiteral(pyval, None, self.client, device) def delete_buffer(self, c_buffer): c_api.DeleteLocalShapedBuffer(c_buffer) def destructure_tuple(self, c_buffer): - result = c_api.DestructureLocalShapedBufferTuple(c_buffer) + result = c_buffer.DestructureTuple() return [result.Release(i) for i in xrange(result.size())] def compile(self, c_computation, argument_shapes, compile_options): - return c_computation.Compile(argument_shapes, compile_options) + return c_computation.Compile(argument_shapes, compile_options, self.client) def delete_executable(self, executable): assert isinstance(executable, c_api.LocalExecutable) @@ -130,6 +148,9 @@ class XrtBackend(Backend): def __init__(self, target): self.target = target + def device_count(self): + return 1 # Multidevice execution not implemented. + def buffer_from_pyval(self, pyval, device=0): if device != 0: raise NotImplementedError( @@ -163,7 +184,20 @@ class XrtBackend(Backend): return [executable.Execute(per_replica_args[0])] -XLA_LOCAL_BACKEND = XlaLocalBackend() +_default_platform_name = 'Host' +_default_backend = None + + +def _get_default_platform_name(): + return _default_platform_name + + +def _get_default_local_backend(): + global _default_backend + global _default_platform_name + if _default_backend is None: + _default_backend = XlaLocalBackend(_default_platform_name) + return _default_backend class BackendType(enum.Enum): @@ -174,7 +208,7 @@ class BackendType(enum.Enum): def BackendSpec(backend, target): """Compatibility wrapper to support older clients. Do not use in new code.""" if backend == BackendType.XLA_LOCAL: - return XLA_LOCAL_BACKEND + return _get_default_local_backend() elif backend == BackendType.XRT: return XrtBackend(target) else: @@ -201,13 +235,6 @@ def CurrentSourceInfoMetadata(op_type=None, op_name=None, skip_frames=1): source_line=lineno) -def _maybe_encode_string(s): - if six.PY3: - return s.encode('utf-8') - else: - return s - - class PaddingType(enum.Enum): VALID = 1 SAME = 2 @@ -346,22 +373,18 @@ class LocalBuffer(object): means the referent is in device memory. """ - def __init__(self, c_buffer, backend, replica): + def __init__(self, c_buffer, backend, device): self.c_buffer = c_buffer self._backend = backend - self._replica = replica + self._device = device @staticmethod - def from_pyval(pyval, replica=0, backend=XLA_LOCAL_BACKEND): + def from_pyval(pyval, device=0, backend=None): """Allocate and copy to XLA the given python value.""" + backend = backend or _get_default_local_backend() pyval = require_numpy_array_layout(pyval) - num_replicas = get_replica_count() - if not 0 <= replica < num_replicas: - raise ValueError( - 'Attempt to place buffer on replica {} when the replica count is {}' - .format(replica, num_replicas)) - cbuf = backend.buffer_from_pyval(pyval, replica) - return LocalBuffer(cbuf, backend, replica) + cbuf = backend.buffer_from_pyval(pyval, device) + return LocalBuffer(cbuf, backend, device) def to_py(self): return self.c_buffer.ToLiteral() @@ -369,8 +392,8 @@ class LocalBuffer(object): def shape(self): return _wrap_shape(self.c_buffer.shape()) - def replica(self): - return self._replica + def device(self): + return self._device def delete(self): if self.c_buffer is not None: @@ -383,7 +406,7 @@ class LocalBuffer(object): result = self._backend.destructure_tuple(self.c_buffer) self.delete() return tuple( - LocalBuffer(sub_buffer, replica=self._replica, backend=self._backend) + LocalBuffer(sub_buffer, device=self._device, backend=self._backend) for sub_buffer in result) def is_deleted(self): @@ -595,7 +618,7 @@ class CompileOptions(object): self.num_replicas = get_replica_count() -def transfer_to_infeed(value, replica_number=None): +def transfer_to_infeed(value, device_ordinal=0): """Transfers the given value into the XLA infeed queue. XLA's infeed queue is a single queue that feeds the "XLA virtual machine" with @@ -605,28 +628,29 @@ def transfer_to_infeed(value, replica_number=None): Args: value: the value that the caller would like to enqueue into the XLA infeed queue - replica_number: the replica number to infeed the value to -- if not - provided, then the default replica (trivially replica 0) is used. + device_ordinal: the device to infeed the value to. Each device has a + distinct infeed queue. """ - if replica_number is None: - c_api.TransferToInfeedLocal(require_numpy_array_layout(value)) - else: - c_api.TransferToInfeedLocalReplica( - require_numpy_array_layout(value), replica_number) + # TODO(phawkins): support non-default backends. + backend = _get_default_local_backend() + backend.client.TransferToInfeed( + require_numpy_array_layout(value), device_ordinal) -def transfer_from_outfeed(shape, replica_number=None): - """Transfers a literal of the given shape from replica_number's outfeed. +def transfer_from_outfeed(shape, device_ordinal=0): + """Transfers a literal of the given shape from `device_ordinal`'s outfeed. Args: shape: The shape of the value to transfer from outfeed. - replica_number: The replica number ordinal to transfer the outfeed value - from. (Each replica has a distinct outfeed queue.) + device_ordinal: The device ordinal to transfer the outfeed value from. Each + device has a distinct outfeed queue.. Returns: The literal value that is produced from the outfeed queue. """ - return c_api.TransferFromOutfeedLocalReplica(shape, replica_number or 0) + # TODO(phawkins): support non-default backends. + backend = _get_default_local_backend() + return backend.client.TransferFromOutfeed(shape, device_ordinal) class Computation(object): @@ -673,7 +697,7 @@ class Computation(object): Returns: A Executable instance. """ - backend = backend or self._backend or XLA_LOCAL_BACKEND + backend = backend or self._backend or _get_default_local_backend() result_shape = _wrap_shape(self.computation.GetReturnValueShape()) if layout_fn: @@ -720,15 +744,21 @@ class Executable(object): def __init__(self, c_executable, backend=None): self._c_executable = c_executable + self._device_ordinals = c_executable.DeviceOrdinals() self._backend = backend + def DeviceOrdinals(self): + """Returns a list containing the device ordinals for each replica.""" + return self._device_ordinals + def Execute(self, arguments=(), check_for_deleted_args=True): """Execute on one replica with LocalBuffer arguments and return value.""" if check_for_deleted_args and any(arg.is_deleted() for arg in arguments): raise ValueError('Executing with deleted local buffer argument') raw_args = [arg.c_buffer for arg in arguments] output_buffer = self._backend.execute(self._c_executable, raw_args) - return LocalBuffer(output_buffer, backend=self._backend, replica=0) + return LocalBuffer( + output_buffer, backend=self._backend, device=self._device_ordinals[0]) def ExecutePerReplica(self, arguments=None): """Execute on many replicas with LocalBuffer arguments and return value. @@ -738,12 +768,12 @@ class Executable(object): sequence comprises the arguments for execution on the i'th replica. Returns: - A list of the computation's outputs on each replica, as a LocalBuffer. If + A list of the computation's outputs for each replica, as a LocalBuffer. If a shallow sequence of arguments was passed in for `arguments`, then the sole, zero'th replica's output is returned instead, as a LocalBuffer. """ if arguments is None: - arguments = ((),) * get_replica_count() + arguments = ((),) * len(self._device_ordinals) else: arguments = [list(replica_args) for replica_args in arguments] @@ -752,12 +782,13 @@ class Executable(object): for arg in replica_args: if arg.is_deleted(): raise ValueError('Executing with deleted local buffer argument') - if arg.replica() != replica: + if arg.device() != self._device_ordinals[replica]: raise ValueError( - 'Executing on replica {} with argument from replica {}'.format( - replica, arg.replica())) + 'Executing on device {} with argument from device {}'.format( + self._device_ordinals[replica], arg.device())) # Pull out argument buffer handles + # pylint: disable=g-complex-comprehension stripped_args = [ [arg.c_buffer for arg in replica_args] for replica_args in arguments ] @@ -768,14 +799,18 @@ class Executable(object): # Wrap output handles in LocalBuffer instances return tuple( - LocalBuffer(output_buffer, backend=self._backend, replica=replica) + LocalBuffer( + output_buffer, + backend=self._backend, + device=self._device_ordinals[replica]) for replica, output_buffer in enumerate(output_buffers)) def ExecuteWithPythonValues(self, arguments=()): """Execute on one replica with Python values as arguments and output.""" def put(arg): - return LocalBuffer.from_pyval(arg, backend=self._backend) + return LocalBuffer.from_pyval( + arg, device=self._device_ordinals[0], backend=self._backend) arguments = [put(arg) for arg in arguments] return self.Execute(arguments).to_py() @@ -783,12 +818,13 @@ class Executable(object): def ExecuteWithPythonValuesPerReplica(self, arguments): """Execute on many replicas with Python values as arguments and output.""" - def put(arg, replica): - return LocalBuffer.from_pyval(arg, replica, backend=self._backend) + def put(arg, device): + return LocalBuffer.from_pyval(arg, device, backend=self._backend) - arguments = [[put(arg, replica) - for arg in replica_args] - for replica, replica_args in enumerate(arguments)] + # pylint: disable=g-complex-comprehension + arguments = [[ + put(arg, self._device_ordinals[replica]) for arg in replica_args + ] for replica, replica_args in enumerate(arguments)] return [out.to_py() for out in self.ExecutePerReplica(arguments)] def __del__(self): @@ -1722,8 +1758,14 @@ def _forward_methods_to_local_builder(): _forward_methods_to_local_builder() +_default_replica_count = 1 + + def initialize_replica_count(replica_count): - """Initializes the desired replica count to use on XLA service init. + """Initializes the default replica count to use. + + Deprecated; pass `num_replicas` as an option to `Computation.Compile()` + instead. Args: replica_count: number of replicas that are desired for set up during XLA @@ -1732,31 +1774,27 @@ def initialize_replica_count(replica_count): Raises: A runtime exception if the XLA service has already been initialized. """ - c_api.InitializeReplicaCount(replica_count) + global _default_replica_count + _default_replica_count = replica_count -def initialize_platform_name(platform_name): - """Initializes the desired platform name to use on XLA service init. - - Args: - platform_name: string name of platform. +def get_replica_count(): + """Returns the default replica count. - Raises: - A runtime exception if the XLA service has already been initialized. - A runtime exception if the platform does not exist, or there are no devices - with that platform. + Deprecated; pass `num_replicas` as an option to `Computation.Compile()` + instead. """ - platform_name = _maybe_encode_string(platform_name) - c_api.InitializePlatformName(platform_name) + return _default_replica_count -def get_replica_count(): - """Returns the current replica count used for the XLA service. +def initialize_platform_name(platform_name): + """Initializes the default platform name to use for XLA. - Note: this will return a value whether the XLA service has been initialized - yet or not. + Args: + platform_name: string name of platform. """ - return c_api.GetReplicaCount() + global _default_platform_name + _default_platform_name = platform_name def register_cpu_custom_call_target(name, fn): -- GitLab From d784a48aea16c150e618d5aaf42b320d108dd20d Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Thu, 14 Feb 2019 08:53:16 -0800 Subject: [PATCH 138/351] Fix bug causing inconsistent AST when collections of lambdas are involved in template expansions. PiperOrigin-RevId: 233964704 --- tensorflow/python/autograph/pyct/templates.py | 8 ++++++++ tensorflow/python/autograph/pyct/templates_test.py | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/autograph/pyct/templates.py b/tensorflow/python/autograph/pyct/templates.py index 831eb6dbbf..b682a21bec 100644 --- a/tensorflow/python/autograph/pyct/templates.py +++ b/tensorflow/python/autograph/pyct/templates.py @@ -92,6 +92,14 @@ class ContextAdjuster(gast.NodeTransformer): return self.generic_visit(node) def visit_comprehension(self, node): + # We may be able to override some of these, but for now it's simpler + # to just assert that they're set. + self._ctx_override = None + return self.generic_visit(node) + + def visit_Lambda(self, node): + # We may be able to override some of these, but for now it's simpler + # to just assert that they're set. self._ctx_override = None return self.generic_visit(node) diff --git a/tensorflow/python/autograph/pyct/templates_test.py b/tensorflow/python/autograph/pyct/templates_test.py index bd6b451eda..4762aaf3ff 100644 --- a/tensorflow/python/autograph/pyct/templates_test.py +++ b/tensorflow/python/autograph/pyct/templates_test.py @@ -248,6 +248,16 @@ class TemplatesTest(test.TestCase): self.assertIsInstance(arg_node.generators[0].target.ctx, gast.Store) self.assertIsInstance(arg_node.elt.ctx, gast.Load) + def test_lambda_in_function_call(self): + template = """ + a = foo(arg) + """ + source = parser.parse_expression('[lambda i: i]') + node = templates.replace(template, arg=source) + lambda_arg = node[0].value.args[0].elts[0] + self.assertIsInstance(lambda_arg.args.args[0].ctx, gast.Param) + self.assertIsInstance(lambda_arg.body.ctx, gast.Load) + if __name__ == '__main__': test.main() -- GitLab From 7f62d5d59d20ecfd9246ec1470d25f3670bb1ff7 Mon Sep 17 00:00:00 2001 From: Andr? Susano Pinto Date: Thu, 14 Feb 2019 09:27:06 -0800 Subject: [PATCH 139/351] Raise NotImplementedError if an input_signature is used in a functools.partial. Using input_signature together with functools.partial breaks in almost every possible way at the moment. PiperOrigin-RevId: 233970596 --- tensorflow/python/eager/function.py | 4 ++++ tensorflow/python/eager/function_test.py | 15 +++++++++++++++ tensorflow/python/saved_model/save.py | 6 ++++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 1b7e751801..d2c3cb824f 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -844,6 +844,10 @@ class FunctionSpec(object): python_function_to_inspect = python_function.func args_to_prepend = python_function.args or tuple() kwargs_to_include = python_function.keywords or {} + if input_signature is not None: + # TODO(b/124441704): Add support for input_signature + partial. + raise NotImplementedError( + "Missing support for input_signature when using partial functions.") else: python_function_to_inspect = python_function args_to_prepend = tuple() diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 7fecf80760..34f0b4479b 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1471,6 +1471,21 @@ class FunctionTest(test.TestCase, parameterized.TestCase): defined([a], [a, a, a]) defined([a, a], [a, a]) + def testInputSignatureWithPartialFunction(self): + self.skipTest('b/124441704') + def full_function(a, b, c=3.0): + return a, b, c + + partial = functools.partial(full_function, 1, c=4) + a, b, c = partial(2.0) + signature = [tensor_spec.TensorSpec([], dtypes.float32)] + defined = function.defun(partial, input_signature=signature) + x = constant_op.constant(2.0) + func_a, func_b, func_c = defined(x) + self.assertEqual(func_a.numpy(), a) + self.assertEqual(func_b.numpy(), b) + self.assertEqual(func_c.numpy(), c) + def testInputSignatureForFunctionWithNonTensorInputsNotAllowed(self): def foo(a, training=True): diff --git a/tensorflow/python/saved_model/save.py b/tensorflow/python/saved_model/save.py index 3180b6f169..db3f2446f2 100644 --- a/tensorflow/python/saved_model/save.py +++ b/tensorflow/python/saved_model/save.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import collections -import functools import os from tensorflow.core.framework import versions_pb2 @@ -421,10 +420,13 @@ def _trace_resource_initializers(accessible_objects): obj.initialize() return constant_op.constant(1.) # Dummy control output + def _wrap_obj_initializer(obj): + return lambda: _wrap_initializer(obj) + for obj in accessible_objects: if isinstance(obj, tracking.TrackableResource): resource_initializers.append(def_function.function( - functools.partial(_wrap_initializer, obj), + _wrap_obj_initializer(obj), # All inputs are captures. input_signature=[]).get_concrete_function()) return resource_initializers -- GitLab From 248016edd1555315c4db540e668d42b95292c2c7 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 14 Feb 2019 09:35:56 -0800 Subject: [PATCH 140/351] [XLA:Python] Instantiate the platform in xla_client.initialize_platform_name. This is a backward compatibility measure to make JAX's existing platform fallback logic work. PiperOrigin-RevId: 233972125 --- tensorflow/compiler/xla/python/xla_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index eb109ec157..020cc587fe 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -1796,6 +1796,9 @@ def initialize_platform_name(platform_name): global _default_platform_name _default_platform_name = platform_name + # Make sure the platform is valid by trying to instantiate it. + _get_default_local_backend() + def register_cpu_custom_call_target(name, fn): """Registers a CPU custom call target. -- GitLab From 3c9b46c245a57df746946403042cca71a94622d2 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 14 Feb 2019 10:28:30 -0800 Subject: [PATCH 141/351] Add BeamSearchDecoderV2 which can be used as a keras layer. PiperOrigin-RevId: 233982439 --- .../kernel_tests/beam_search_decoder_test.py | 126 ++++- .../seq2seq/python/ops/attention_wrapper.py | 19 +- .../seq2seq/python/ops/beam_search_decoder.py | 531 +++++++++++++----- 3 files changed, 528 insertions(+), 148 deletions(-) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py index 5e28e651c6..56f2a0acc9 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_decoder_test.py @@ -25,10 +25,13 @@ from tensorflow.contrib.seq2seq.python.ops import attention_wrapper from tensorflow.contrib.seq2seq.python.ops import beam_search_decoder from tensorflow.contrib.seq2seq.python.ops import beam_search_ops from tensorflow.contrib.seq2seq.python.ops import decoder +from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.keras import layers from tensorflow.python.layers import core as layers_core from tensorflow.python.ops import array_ops from tensorflow.python.ops import nn_ops @@ -530,11 +533,10 @@ class BeamSearchDecoderTest(test.TestCase): return (shape[1], shape[0]) + shape[2:] return shape - self.assertTrue( - isinstance(final_outputs, - beam_search_decoder.FinalBeamSearchDecoderOutput)) - self.assertTrue( - isinstance(final_state, beam_search_decoder.BeamSearchDecoderState)) + self.assertIsInstance( + final_outputs, beam_search_decoder.FinalBeamSearchDecoderOutput) + self.assertIsInstance( + final_state, beam_search_decoder.BeamSearchDecoderState) beam_search_decoder_output = final_outputs.beam_search_decoder_output self.assertEqual( @@ -574,5 +576,119 @@ class BeamSearchDecoderTest(test.TestCase): with_alignment_history=True) +@test_util.run_all_in_graph_and_eager_modes +class BeamSearchDecoderV2Test(test.TestCase): + + def _testDynamicDecodeRNN(self, time_major, has_attention, + with_alignment_history=False): + encoder_sequence_length = np.array([3, 2, 3, 1, 1]) + decoder_sequence_length = np.array([2, 0, 1, 2, 3]) + batch_size = 5 + decoder_max_time = 4 + input_depth = 7 + cell_depth = 9 + attention_depth = 6 + vocab_size = 20 + end_token = vocab_size - 1 + start_token = 0 + embedding_dim = 50 + max_out = max(decoder_sequence_length) + output_layer = layers.Dense(vocab_size, use_bias=True, activation=None) + beam_width = 3 + + with self.cached_session(): + batch_size_tensor = constant_op.constant(batch_size) + embedding = np.random.randn(vocab_size, embedding_dim).astype(np.float32) + cell = rnn_cell.LSTMCell(cell_depth) + initial_state = cell.zero_state(batch_size, dtypes.float32) + coverage_penalty_weight = 0.0 + if has_attention: + coverage_penalty_weight = 0.2 + inputs = array_ops.placeholder_with_default( + np.random.randn(batch_size, decoder_max_time, input_depth).astype( + np.float32), + shape=(None, None, input_depth)) + tiled_inputs = beam_search_decoder.tile_batch( + inputs, multiplier=beam_width) + tiled_sequence_length = beam_search_decoder.tile_batch( + encoder_sequence_length, multiplier=beam_width) + attention_mechanism = attention_wrapper.BahdanauAttention( + num_units=attention_depth, + memory=tiled_inputs, + memory_sequence_length=tiled_sequence_length) + initial_state = beam_search_decoder.tile_batch( + initial_state, multiplier=beam_width) + cell = attention_wrapper.AttentionWrapper( + cell=cell, + attention_mechanism=attention_mechanism, + attention_layer_size=attention_depth, + alignment_history=with_alignment_history) + cell_state = cell.zero_state( + dtype=dtypes.float32, batch_size=batch_size_tensor * beam_width) + if has_attention: + cell_state = cell_state.clone(cell_state=initial_state) + bsd = beam_search_decoder.BeamSearchDecoderV2( + cell=cell, + beam_width=beam_width, + output_layer=output_layer, + length_penalty_weight=0.0, + coverage_penalty_weight=coverage_penalty_weight, + output_time_major=time_major, + maximum_iterations=max_out) + + final_outputs, final_state, final_sequence_lengths = bsd( + embedding, + start_tokens=array_ops.fill([batch_size_tensor], start_token), + end_token=end_token, + initial_state=cell_state) + + def _t(shape): + if time_major: + return (shape[1], shape[0]) + shape[2:] + return shape + + self.assertIsInstance( + final_outputs, beam_search_decoder.FinalBeamSearchDecoderOutput) + self.assertIsInstance( + final_state, beam_search_decoder.BeamSearchDecoderState) + + beam_search_decoder_output = final_outputs.beam_search_decoder_output + expected_seq_length = 3 if context.executing_eagerly() else None + self.assertEqual( + _t((batch_size, expected_seq_length, beam_width)), + tuple(beam_search_decoder_output.scores.get_shape().as_list())) + self.assertEqual( + _t((batch_size, expected_seq_length, beam_width)), + tuple(final_outputs.predicted_ids.get_shape().as_list())) + + self.evaluate(variables.global_variables_initializer()) + eval_results = self.evaluate({ + 'final_outputs': final_outputs, + 'final_sequence_lengths': final_sequence_lengths + }) + + max_sequence_length = np.max(eval_results['final_sequence_lengths']) + + # A smoke test + self.assertEqual( + _t((batch_size, max_sequence_length, beam_width)), + eval_results['final_outputs'].beam_search_decoder_output.scores.shape) + self.assertEqual( + _t((batch_size, max_sequence_length, beam_width)), eval_results[ + 'final_outputs'].beam_search_decoder_output.predicted_ids.shape) + + def testDynamicDecodeRNNBatchMajorNoAttention(self): + self._testDynamicDecodeRNN(time_major=False, has_attention=False) + + def testDynamicDecodeRNNBatchMajorYesAttention(self): + self._testDynamicDecodeRNN(time_major=False, has_attention=True) + + def testDynamicDecodeRNNBatchMajorYesAttentionWithAlignmentHistory(self): + self._testDynamicDecodeRNN( + time_major=False, + has_attention=True, + with_alignment_history=True) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 5bcf0af889..79c2ac2f50 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -25,6 +25,7 @@ import math import numpy as np from tensorflow.contrib.framework.python.framework import tensor_util +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -1919,7 +1920,15 @@ class AttentionWrapperState( def with_same_shape(old, new): """Check and set new tensor's shape.""" if isinstance(old, ops.Tensor) and isinstance(new, ops.Tensor): - return tensor_util.with_same_shape(old, new) + if not context.executing_eagerly(): + return tensor_util.with_same_shape(old, new) + else: + if old.shape.as_list() != new.shape.as_list(): + raise ValueError("The shape of the AttentionWrapperState is " + "expected to be same as the one to clone. " + "self.shape: %s, input.shape: %s" % + (old.shape, new.shape)) + return new return new return nest.map_structure( @@ -2048,13 +2057,13 @@ def _compute_attention(attention_mechanism, cell_output, attention_state, # the batched matmul is over memory_time, so the output shape is # [batch_size, 1, memory_size]. # we then squeeze out the singleton dim. - context = math_ops.matmul(expanded_alignments, attention_mechanism.values) - context = array_ops.squeeze(context, [1]) + context_ = math_ops.matmul(expanded_alignments, attention_mechanism.values) + context_ = array_ops.squeeze(context_, [1]) if attention_layer is not None: - attention = attention_layer(array_ops.concat([cell_output, context], 1)) + attention = attention_layer(array_ops.concat([cell_output, context_], 1)) else: - attention = context + attention = context_ return attention, alignments, next_attention_state diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 8f8f057702..1d773a4498 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -24,11 +24,12 @@ import numpy as np from tensorflow.contrib.seq2seq.python.ops import attention_wrapper from tensorflow.contrib.seq2seq.python.ops import beam_search_ops from tensorflow.contrib.seq2seq.python.ops import decoder +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util -from tensorflow.python.layers import base as layers_base +from tensorflow.python.keras import layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import embedding_ops @@ -182,11 +183,12 @@ def gather_tree_from_array(t, parent_ids, sequence_length): return ordered -def _check_maybe(t): +def _check_ndims(t): if t.shape.ndims is None: raise ValueError( "Expected tensor (%s) to have known rank, but ndims == None." % t) + def _check_static_batch_beam_maybe(shape, batch_size, beam_width): """Raises an exception if dimensions are known statically and can not be reshaped to [batch_size, beam_size, -1]. @@ -205,6 +207,7 @@ def _check_static_batch_beam_maybe(shape, batch_size, beam_width): return False return True + def _check_batch_beam(t, batch_size, beam_width): """Returns an Assert operation checking that the elements of the stacked TensorArray can be reshaped to [batch_size, beam_size, -1]. At this point, @@ -229,70 +232,30 @@ def _check_batch_beam(t, batch_size, beam_width): return control_flow_ops.Assert(condition, [error_message]) +class BeamSearchDecoderMixin(object): + """BeamSearchDecoderMixin contains the common methods for BeamSearchDecoder. -class BeamSearchDecoder(decoder.Decoder): - """BeamSearch sampling decoder. - - **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in - `AttentionWrapper`, then you must ensure that: - - - The encoder output has been tiled to `beam_width` via - `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`). - - The `batch_size` argument passed to the `zero_state` method of this - wrapper is equal to `true_batch_size * beam_width`. - - The initial state created with `zero_state` above contains a - `cell_state` value containing properly tiled final state from the - encoder. - - An example: - - ``` - tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( - encoder_outputs, multiplier=beam_width) - tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch( - encoder_final_state, multiplier=beam_width) - tiled_sequence_length = tf.contrib.seq2seq.tile_batch( - sequence_length, multiplier=beam_width) - attention_mechanism = MyFavoriteAttentionMechanism( - num_units=attention_depth, - memory=tiled_inputs, - memory_sequence_length=tiled_sequence_length) - attention_cell = AttentionWrapper(cell, attention_mechanism, ...) - decoder_initial_state = attention_cell.zero_state( - dtype, batch_size=true_batch_size * beam_width) - decoder_initial_state = decoder_initial_state.clone( - cell_state=tiled_encoder_final_state) - ``` - - Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use - when computing scores(https://arxiv.org/pdf/1609.08144.pdf). It encourages - the translation to cover all inputs. + It is expected to be used a base class for concrete BeamSearchDecoder. Since + this is a mixin class, it is expected to be used together with other class as + base. """ def __init__(self, cell, - embedding, - start_tokens, - end_token, - initial_state, beam_width, output_layer=None, length_penalty_weight=0.0, coverage_penalty_weight=0.0, - reorder_tensor_arrays=True): - """Initialize the BeamSearchDecoder. + reorder_tensor_arrays=True, + **kwargs): + """Initialize the BeamSearchDecoderMixin. Args: cell: An `RNNCell` instance. - embedding: A callable that takes a vector tensor of `ids` (argmax ids), - or the `params` argument for `embedding_lookup`. - start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. - end_token: `int32` scalar, the token that marks end of decoding. - initial_state: A (possibly nested tuple of...) tensors and TensorArrays. beam_width: Python integer, the number of beams. - output_layer: (Optional) An instance of `tf.layers.Layer`, i.e., - `tf.layers.Dense`. Optional layer to apply to the RNN output prior - to storing the result or sampling. + output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e., + `tf.keras.layers.Dense`. Optional layer to apply to the RNN output + prior to storing the result or sampling. length_penalty_weight: Float weight to penalize length. Disabled with 0.0. coverage_penalty_weight: Float weight to penalize the coverage of source sentence. Disabled with 0.0. @@ -302,59 +265,35 @@ class BeamSearchDecoder(decoder.Decoder): Otherwise, the `TensorArray` will be returned as is. Set this flag to `False` if the cell state contains `TensorArray`s that are not amenable to reordering. + **kwargs: Dict, other keyword arguments for parent class. Raises: TypeError: if `cell` is not an instance of `RNNCell`, - or `output_layer` is not an instance of `tf.layers.Layer`. - ValueError: If `start_tokens` is not a vector or - `end_token` is not a scalar. + or `output_layer` is not an instance of `tf.keras.layers.Layer`. """ rnn_cell_impl.assert_like_rnncell("cell", cell) # pylint: disable=protected-access if (output_layer is not None and - not isinstance(output_layer, layers_base.Layer)): + not isinstance(output_layer, layers.Layer)): raise TypeError( "output_layer must be a Layer, received: %s" % type(output_layer)) self._cell = cell self._output_layer = output_layer self._reorder_tensor_arrays = reorder_tensor_arrays - if callable(embedding): - self._embedding_fn = embedding - else: - self._embedding_fn = ( - lambda ids: embedding_ops.embedding_lookup(embedding, ids)) - - self._start_tokens = ops.convert_to_tensor( - start_tokens, dtype=dtypes.int32, name="start_tokens") - if self._start_tokens.get_shape().ndims != 1: - raise ValueError("start_tokens must be a vector") - self._end_token = ops.convert_to_tensor( - end_token, dtype=dtypes.int32, name="end_token") - if self._end_token.get_shape().ndims != 0: - raise ValueError("end_token must be a scalar") - - self._batch_size = array_ops.size(start_tokens) + self._start_tokens = None + self._end_token = None + self._batch_size = None self._beam_width = beam_width self._length_penalty_weight = length_penalty_weight self._coverage_penalty_weight = coverage_penalty_weight - self._initial_cell_state = nest.map_structure( - self._maybe_split_batch_beams, initial_state, self._cell.state_size) - self._start_tokens = array_ops.tile( - array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) - self._start_inputs = self._embedding_fn(self._start_tokens) - - self._finished = array_ops.one_hot( - array_ops.zeros([self._batch_size], dtype=dtypes.int32), - depth=self._beam_width, - on_value=False, - off_value=True, - dtype=dtypes.bool) + super(BeamSearchDecoderMixin, self).__init__(**kwargs) @property def batch_size(self): return self._batch_size def _rnn_output_size(self): + """Get the output shape from the RNN layer.""" size = self._cell.output_size if self._output_layer is None: return size @@ -393,50 +332,6 @@ class BeamSearchDecoder(decoder.Decoder): predicted_ids=tensor_shape.TensorShape([self._beam_width]), parent_ids=tensor_shape.TensorShape([self._beam_width])) - @property - def output_dtype(self): - # Assume the dtype of the cell is the output_size structure - # containing the input_state's first component's dtype. - # Return that structure and int32 (the id) - dtype = nest.flatten(self._initial_cell_state)[0].dtype - return BeamSearchDecoderOutput( - scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()), - predicted_ids=dtypes.int32, - parent_ids=dtypes.int32) - - def initialize(self, name=None): - """Initialize the decoder. - - Args: - name: Name scope for any created operations. - - Returns: - `(finished, start_inputs, initial_state)`. - """ - finished, start_inputs = self._finished, self._start_inputs - - dtype = nest.flatten(self._initial_cell_state)[0].dtype - log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) - array_ops.zeros([self._batch_size], dtype=dtypes.int32), - depth=self._beam_width, - on_value=ops.convert_to_tensor(0.0, dtype=dtype), - off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), - dtype=dtype) - init_attention_probs = get_attention_probs( - self._initial_cell_state, self._coverage_penalty_weight) - if init_attention_probs is None: - init_attention_probs = () - - initial_state = BeamSearchDecoderState( - cell_state=self._initial_cell_state, - log_probs=log_probs, - finished=finished, - lengths=array_ops.zeros( - [self._batch_size, self._beam_width], dtype=dtypes.int64), - accumulated_attention_probs=init_attention_probs) - - return (finished, start_inputs, initial_state) - def finalize(self, outputs, final_state, sequence_lengths): """Finalize and return the predicted_ids. @@ -562,7 +457,7 @@ class BeamSearchDecoder(decoder.Decoder): """ if isinstance(t, tensor_array_ops.TensorArray): return t - _check_maybe(t) + _check_ndims(t) if t.shape.ndims >= 1: return self._split_batch_beams(t, s) else: @@ -586,7 +481,7 @@ class BeamSearchDecoder(decoder.Decoder): """ if isinstance(t, tensor_array_ops.TensorArray): return t - _check_maybe(t) + _check_ndims(t) if t.shape.ndims >= 2: return self._merge_batch_beams(t, s) else: @@ -609,11 +504,18 @@ class BeamSearchDecoder(decoder.Decoder): if not isinstance(t, tensor_array_ops.TensorArray): return t # pylint: disable=protected-access - if (not t._infer_shape or not t._element_shape - or t._element_shape[0].ndims is None - or t._element_shape[0].ndims < 1): + # This is a bad hack due to the implementation detail of eager/graph TA. + # TODO(b/124374427): Update this to use public property of TensorArray. + if context.executing_eagerly(): + element_shape = t._element_shape + else: + element_shape = t._element_shape[0] + if (not t._infer_shape + or not t._element_shape + or element_shape.ndims is None + or element_shape.ndims < 1): shape = ( - t._element_shape[0] if t._infer_shape and t._element_shape + element_shape if t._infer_shape and t._element_shape else tensor_shape.TensorShape(None)) tf_logging.warn("The TensorArray %s in the cell state is not amenable to " "sorting based on the beam search result. For a " @@ -621,10 +523,10 @@ class BeamSearchDecoder(decoder.Decoder): "defined and have at least a rank of 1, but saw shape: %s" % (t.handle.name, shape)) return t - shape = t._element_shape[0] # pylint: enable=protected-access if not _check_static_batch_beam_maybe( - shape, tensor_util.constant_value(self._batch_size), self._beam_width): + element_shape, tensor_util.constant_value(self._batch_size), + self._beam_width): return t t = t.stack() with ops.control_dependencies( @@ -684,6 +586,359 @@ class BeamSearchDecoder(decoder.Decoder): return (beam_search_output, beam_search_state, next_inputs, finished) +class BeamSearchDecoder(BeamSearchDecoderMixin, decoder.Decoder): + # Note that the inheritance hierarchy is important here. The Mixin has to be + # the first parent class since we will use super().__init__(), and Mixin which + # is a object will properly invoke the __init__ method of other parent class. + """BeamSearch sampling decoder. + + **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in + `AttentionWrapper`, then you must ensure that: + + - The encoder output has been tiled to `beam_width` via + `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`). + - The `batch_size` argument passed to the `zero_state` method of this + wrapper is equal to `true_batch_size * beam_width`. + - The initial state created with `zero_state` above contains a + `cell_state` value containing properly tiled final state from the + encoder. + + An example: + + ``` + tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( + encoder_outputs, multiplier=beam_width) + tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch( + encoder_final_state, multiplier=beam_width) + tiled_sequence_length = tf.contrib.seq2seq.tile_batch( + sequence_length, multiplier=beam_width) + attention_mechanism = MyFavoriteAttentionMechanism( + num_units=attention_depth, + memory=tiled_inputs, + memory_sequence_length=tiled_sequence_length) + attention_cell = AttentionWrapper(cell, attention_mechanism, ...) + decoder_initial_state = attention_cell.zero_state( + dtype, batch_size=true_batch_size * beam_width) + decoder_initial_state = decoder_initial_state.clone( + cell_state=tiled_encoder_final_state) + ``` + + Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use + when computing scores (https://arxiv.org/pdf/1609.08144.pdf). It encourages + the decoder to cover all inputs. + """ + + def __init__(self, + cell, + embedding, + start_tokens, + end_token, + initial_state, + beam_width, + output_layer=None, + length_penalty_weight=0.0, + coverage_penalty_weight=0.0, + reorder_tensor_arrays=True): + """Initialize the BeamSearchDecoder. + + Args: + cell: An `RNNCell` instance. + embedding: A callable that takes a vector tensor of `ids` (argmax ids), + or the `params` argument for `embedding_lookup`. + start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. + end_token: `int32` scalar, the token that marks end of decoding. + initial_state: A (possibly nested tuple of...) tensors and TensorArrays. + beam_width: Python integer, the number of beams. + output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e., + `tf.keras.layers.Dense`. Optional layer to apply to the RNN output + prior to storing the result or sampling. + length_penalty_weight: Float weight to penalize length. Disabled with 0.0. + coverage_penalty_weight: Float weight to penalize the coverage of source + sentence. Disabled with 0.0. + reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell + state will be reordered according to the beam search path. If the + `TensorArray` can be reordered, the stacked form will be returned. + Otherwise, the `TensorArray` will be returned as is. Set this flag to + `False` if the cell state contains `TensorArray`s that are not amenable + to reordering. + + Raises: + TypeError: if `cell` is not an instance of `RNNCell`, + or `output_layer` is not an instance of `tf.keras.layers.Layer`. + ValueError: If `start_tokens` is not a vector or + `end_token` is not a scalar. + """ + super(BeamSearchDecoder, self).__init__( + cell, + beam_width, + output_layer=output_layer, + length_penalty_weight=length_penalty_weight, + coverage_penalty_weight=coverage_penalty_weight, + reorder_tensor_arrays=reorder_tensor_arrays) + + if callable(embedding): + self._embedding_fn = embedding + else: + self._embedding_fn = ( + lambda ids: embedding_ops.embedding_lookup(embedding, ids)) + + self._start_tokens = ops.convert_to_tensor( + start_tokens, dtype=dtypes.int32, name="start_tokens") + if self._start_tokens.get_shape().ndims != 1: + raise ValueError("start_tokens must be a vector") + self._end_token = ops.convert_to_tensor( + end_token, dtype=dtypes.int32, name="end_token") + if self._end_token.get_shape().ndims != 0: + raise ValueError("end_token must be a scalar") + + self._batch_size = array_ops.size(start_tokens) + self._initial_cell_state = nest.map_structure( + self._maybe_split_batch_beams, initial_state, self._cell.state_size) + self._start_tokens = array_ops.tile( + array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) + self._start_inputs = self._embedding_fn(self._start_tokens) + + self._finished = array_ops.one_hot( + array_ops.zeros([self._batch_size], dtype=dtypes.int32), + depth=self._beam_width, + on_value=False, + off_value=True, + dtype=dtypes.bool) + + def initialize(self, name=None): + """Initialize the decoder. + + Args: + name: Name scope for any created operations. + + Returns: + `(finished, start_inputs, initial_state)`. + """ + finished, start_inputs = self._finished, self._start_inputs + + dtype = nest.flatten(self._initial_cell_state)[0].dtype + log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) + array_ops.zeros([self._batch_size], dtype=dtypes.int32), + depth=self._beam_width, + on_value=ops.convert_to_tensor(0.0, dtype=dtype), + off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), + dtype=dtype) + init_attention_probs = get_attention_probs( + self._initial_cell_state, self._coverage_penalty_weight) + if init_attention_probs is None: + init_attention_probs = () + + initial_state = BeamSearchDecoderState( + cell_state=self._initial_cell_state, + log_probs=log_probs, + finished=finished, + lengths=array_ops.zeros( + [self._batch_size, self._beam_width], dtype=dtypes.int64), + accumulated_attention_probs=init_attention_probs) + + return (finished, start_inputs, initial_state) + + @property + def output_dtype(self): + # Assume the dtype of the cell is the output_size structure + # containing the input_state's first component's dtype. + # Return that structure and int32 (the id) + dtype = nest.flatten(self._initial_cell_state)[0].dtype + return BeamSearchDecoderOutput( + scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()), + predicted_ids=dtypes.int32, + parent_ids=dtypes.int32) + + +class BeamSearchDecoderV2(BeamSearchDecoderMixin, decoder.BaseDecoder): + # Note that the inheritance hierarchy is important here. The Mixin has to be + # the first parent class since we will use super().__init__(), and Mixin which + # is a object will properly invoke the __init__ method of other parent class. + """BeamSearch sampling decoder. + + **NOTE** If you are using the `BeamSearchDecoder` with a cell wrapped in + `AttentionWrapper`, then you must ensure that: + + - The encoder output has been tiled to `beam_width` via + `tf.contrib.seq2seq.tile_batch` (NOT `tf.tile`). + - The `batch_size` argument passed to the `zero_state` method of this + wrapper is equal to `true_batch_size * beam_width`. + - The initial state created with `zero_state` above contains a + `cell_state` value containing properly tiled final state from the + encoder. + + An example: + + ``` + tiled_encoder_outputs = tf.contrib.seq2seq.tile_batch( + encoder_outputs, multiplier=beam_width) + tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch( + encoder_final_state, multiplier=beam_width) + tiled_sequence_length = tf.contrib.seq2seq.tile_batch( + sequence_length, multiplier=beam_width) + attention_mechanism = MyFavoriteAttentionMechanism( + num_units=attention_depth, + memory=tiled_inputs, + memory_sequence_length=tiled_sequence_length) + attention_cell = AttentionWrapper(cell, attention_mechanism, ...) + decoder_initial_state = attention_cell.zero_state( + dtype, batch_size=true_batch_size * beam_width) + decoder_initial_state = decoder_initial_state.clone( + cell_state=tiled_encoder_final_state) + ``` + + Meanwhile, with `AttentionWrapper`, coverage penalty is suggested to use + when computing scores (https://arxiv.org/pdf/1609.08144.pdf). It encourages + the decoding to cover all inputs. + """ + + def __init__(self, + cell, + beam_width, + embedding_fn=None, + output_layer=None, + length_penalty_weight=0.0, + coverage_penalty_weight=0.0, + reorder_tensor_arrays=True, + **kwargs): + """Initialize the BeamSearchDecoderV2. + + Args: + cell: An `RNNCell` instance. + beam_width: Python integer, the number of beams. + embedding_fn: A callable that takes a vector tensor of `ids` (argmax ids). + output_layer: (Optional) An instance of `tf.keras.layers.Layer`, i.e., + `tf.keras.layers.Dense`. Optional layer to apply to the RNN output + prior to storing the result or sampling. + length_penalty_weight: Float weight to penalize length. Disabled with 0.0. + coverage_penalty_weight: Float weight to penalize the coverage of source + sentence. Disabled with 0.0. + reorder_tensor_arrays: If `True`, `TensorArray`s' elements within the cell + state will be reordered according to the beam search path. If the + `TensorArray` can be reordered, the stacked form will be returned. + Otherwise, the `TensorArray` will be returned as is. Set this flag to + `False` if the cell state contains `TensorArray`s that are not amenable + to reordering. + **kwargs: Dict, other keyword arguments for initialization. + + Raises: + TypeError: if `cell` is not an instance of `RNNCell`, + or `output_layer` is not an instance of `tf.keras.layers.Layer`. + """ + super(BeamSearchDecoderV2, self).__init__( + cell, + beam_width, + output_layer=output_layer, + length_penalty_weight=length_penalty_weight, + coverage_penalty_weight=coverage_penalty_weight, + reorder_tensor_arrays=reorder_tensor_arrays, + **kwargs) + + if embedding_fn is None or callable(embedding_fn): + self._embedding_fn = embedding_fn + else: + raise ValueError("embedding_fn is expected to be a callable, got %s" % + type(embedding_fn)) + + def initialize(self, + embedding, + start_tokens, + end_token, + initial_state): + """Initialize the decoder. + + Args: + embedding: A tensor from the embedding layer output, which is the + `params` argument for `embedding_lookup`. + start_tokens: `int32` vector shaped `[batch_size]`, the start tokens. + end_token: `int32` scalar, the token that marks end of decoding. + initial_state: A (possibly nested tuple of...) tensors and TensorArrays. + Returns: + `(finished, start_inputs, initial_state)`. + Raises: + ValueError: If `start_tokens` is not a vector or `end_token` is not a + scalar. + """ + if embedding is not None and self._embedding_fn is not None: + raise ValueError( + "embedding and embedding_fn cannot be provided at same time") + elif embedding is not None: + self._embedding_fn = ( + lambda ids: embedding_ops.embedding_lookup(embedding, ids)) + + self._start_tokens = ops.convert_to_tensor( + start_tokens, dtype=dtypes.int32, name="start_tokens") + if self._start_tokens.get_shape().ndims != 1: + raise ValueError("start_tokens must be a vector") + self._end_token = ops.convert_to_tensor( + end_token, dtype=dtypes.int32, name="end_token") + if self._end_token.get_shape().ndims != 0: + raise ValueError("end_token must be a scalar") + + self._batch_size = array_ops.size(start_tokens) + self._initial_cell_state = nest.map_structure( + self._maybe_split_batch_beams, initial_state, self._cell.state_size) + self._start_tokens = array_ops.tile( + array_ops.expand_dims(self._start_tokens, 1), [1, self._beam_width]) + self._start_inputs = self._embedding_fn(self._start_tokens) + + self._finished = array_ops.one_hot( + array_ops.zeros([self._batch_size], dtype=dtypes.int32), + depth=self._beam_width, + on_value=False, + off_value=True, + dtype=dtypes.bool) + + finished, start_inputs = self._finished, self._start_inputs + + dtype = nest.flatten(self._initial_cell_state)[0].dtype + log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) + array_ops.zeros([self._batch_size], dtype=dtypes.int32), + depth=self._beam_width, + on_value=ops.convert_to_tensor(0.0, dtype=dtype), + off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), + dtype=dtype) + init_attention_probs = get_attention_probs( + self._initial_cell_state, self._coverage_penalty_weight) + if init_attention_probs is None: + init_attention_probs = () + + initial_state = BeamSearchDecoderState( + cell_state=self._initial_cell_state, + log_probs=log_probs, + finished=finished, + lengths=array_ops.zeros( + [self._batch_size, self._beam_width], dtype=dtypes.int64), + accumulated_attention_probs=init_attention_probs) + + return (finished, start_inputs, initial_state) + + @property + def output_dtype(self): + # Assume the dtype of the cell is the output_size structure + # containing the input_state's first component's dtype. + # Return that structure and int32 (the id) + dtype = nest.flatten(self._initial_cell_state)[0].dtype + return BeamSearchDecoderOutput( + scores=nest.map_structure(lambda _: dtype, self._rnn_output_size()), + predicted_ids=dtypes.int32, + parent_ids=dtypes.int32) + + def call(self, embeddning, start_tokens, end_token, initial_state, **kwargs): + init_kwargs = kwargs + init_kwargs["start_tokens"] = start_tokens + init_kwargs["end_token"] = end_token + init_kwargs["initial_state"] = initial_state + return decoder.dynamic_decode(self, + output_time_major=self.output_time_major, + impute_finished=self.impute_finished, + maximum_iterations=self.maximum_iterations, + parallel_iterations=self.parallel_iterations, + swap_memory=self.swap_memory, + decoder_init_input=embeddning, + decoder_init_kwargs=init_kwargs) + + def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, beam_width, end_token, length_penalty_weight, coverage_penalty_weight): @@ -1068,7 +1323,7 @@ def _maybe_tensor_gather_helper(gather_indices, gather_from, batch_size, """ if isinstance(gather_from, tensor_array_ops.TensorArray): return gather_from - _check_maybe(gather_from) + _check_ndims(gather_from) if gather_from.shape.ndims >= len(gather_shape): return _tensor_gather_helper( gather_indices=gather_indices, -- GitLab From 8aa71253c73992ae7ec1a5cfc59d469fcdc06d11 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 14 Feb 2019 10:33:15 -0800 Subject: [PATCH 142/351] Add an imported representation of init ops for 1.x-style SavedModels Adds TrackableAssets to a property of the root object, and adds a TrackableResource which runs a function containing the original init op. This allows repeated re-export of 1.x SavedModels without losing information about assets or initialization. Uniquifies shared_names rather than clearing them. 1.x SavedModels rely on shared_names sharing resources across functions (e.g. the init function vs. the function that uses a table) so clearing doesn't really work. This will leak cached kernels, but at least provides the right behavior. PiperOrigin-RevId: 233983446 --- .../saved_model/function_deserialization.py | 13 +++++-- .../python/saved_model/load_v1_in_v2.py | 38 ++++++++++++++++++- .../python/saved_model/load_v1_in_v2_test.py | 11 ++++-- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py index 8d14340c07..4be04aa15e 100644 --- a/tensorflow/python/saved_model/function_deserialization.py +++ b/tensorflow/python/saved_model/function_deserialization.py @@ -212,8 +212,9 @@ def load_function_def_library(library): """ functions = {} + load_shared_name_suffix = "_load_{}".format(ops.uid()) for fdef in _sort_function_defs(library): - copy = _fix_fdef(fdef, functions) + copy = _fix_fdef(fdef, functions, load_shared_name_suffix) func_graph = function_def_lib.function_def_to_graph(copy) for dep in _list_function_deps(fdef): @@ -263,7 +264,7 @@ def _sort_function_defs(library): return [reverse[x] for x in output] -def _fix_fdef(orig_fdef, functions): +def _fix_fdef(orig_fdef, functions, shared_name_suffix): """Fixes a FunctionDef proto to be loaded in current context. In particular, when loading a function library into an eager context, one @@ -272,6 +273,10 @@ def _fix_fdef(orig_fdef, functions): Args: orig_fdef: FunctionDef proto to fix. It is not modified. functions: map from function name to a ConcreteFunction instance. + shared_name_suffix: A unique string for this load which helps to avoid + `shared_name` collisions across loads. Two functions from the same load + using the same `shared_name` still need to share, but functions from + different loads with the same `shared_name` should not. Returns: A fixed copy of the original FunctionDef. @@ -296,10 +301,10 @@ def _fix_fdef(orig_fdef, functions): attr_value.func.name = functions[attr_value.func.name].name # TODO(b/124205571): Avoid accidental sharing and destruction of restored - # resources. For now drop "shared_name" when loading functions to avoid + # resources. For now uniquify "shared_name" when loading functions to avoid # sharing. if "shared_name" in node_def.attr: - del node_def.attr["shared_name"] + node_def.attr["shared_name"].s += compat.as_bytes(shared_name_suffix) fdef.signature.name = _clean_function_name(fdef.signature.name) return fdef diff --git a/tensorflow/python/saved_model/load_v1_in_v2.py b/tensorflow/python/saved_model/load_v1_in_v2.py index bba20541ea..7f011919b9 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2.py +++ b/tensorflow/python/saved_model/load_v1_in_v2.py @@ -22,12 +22,41 @@ import functools from tensorflow.python.eager import wrap_function from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops from tensorflow.python.saved_model import loader_impl from tensorflow.python.saved_model import signature_serialization from tensorflow.python.training import saver as tf_saver from tensorflow.python.training.checkpointable import tracking +class _Initializer(tracking.TrackableResource): + """Represents an initialization operation restored from a SavedModel. + + Without this object re-export of imported 1.x SavedModels would omit the + original SavedModel's initialization procedure. + + Created when `tf.saved_model.load` loads a TF 1.x-style SavedModel with an + initialization op. This object holds a function which runs the + initialization. It does not require any manual user intervention; + `tf.saved_model.save` will see this object and automatically add it to the + exported SavedModel, and `tf.saved_model.load` runs the initialization + function automatically. + """ + + def __init__(self, init_fn, asset_paths): + super(_Initializer, self).__init__() + self._asset_paths = asset_paths + self._init_fn = init_fn + + def create_resource(self): + return array_ops.placeholder( + dtype=dtypes.resource, shape=[], name="unused_resource") + + def initialize(self): + self._init_fn(*[path.asset_path for path in self._asset_paths]) + + class _EagerSavedModelLoader(loader_impl.SavedModelLoader): """Loads a SavedModel without using Sessions.""" @@ -94,6 +123,7 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): self.restore_variables(wrapped, saver) with wrapped.graph.as_default(): init_op = loader_impl.get_init_op(meta_graph_def) + root = tracking.AutoCheckpointable() if init_op is not None: asset_feed_tensors = [] asset_paths = [] @@ -104,9 +134,13 @@ class _EagerSavedModelLoader(loader_impl.SavedModelLoader): init_fn = wrapped.prune( feeds=asset_feed_tensors, fetches=[wrapped.graph.as_graph_element(init_op)]) - init_fn(*[path.asset_path for path in asset_paths]) + initializer = _Initializer(init_fn, asset_paths) + initializer.initialize() + root.initializer = initializer + root.asset_paths = asset_paths + else: + root.asset_paths = [] signature_functions = self._extract_signatures(wrapped, meta_graph_def) - root = tracking.AutoCheckpointable() root.signatures = signature_serialization.create_signature_map( signature_functions) root.variables = list(wrapped.graph.variables) diff --git a/tensorflow/python/saved_model/load_v1_in_v2_test.py b/tensorflow/python/saved_model/load_v1_in_v2_test.py index 99d2495292..e8bd61a8c3 100644 --- a/tensorflow/python/saved_model/load_v1_in_v2_test.py +++ b/tensorflow/python/saved_model/load_v1_in_v2_test.py @@ -192,14 +192,19 @@ class LoadTest(test.TestCase): str(ops.uid())) save.save(imported, second_path, signatures=imported.signatures) shutil.rmtree(first_path) - self.skipTest( - "TODO(b/124321570): save TrackableAssets and make re-saving initialize " - "correctly") second_import = load.load(second_path) fn = second_import.signatures["serving_default"] self.assertAllClose({"output": [2, 0]}, fn(start=constant_op.constant(["gamma", "alpha"]))) + third_path = os.path.join(self.get_temp_dir(), "saved_model", + str(ops.uid())) + save.save(second_import, third_path, signatures=second_import.signatures) + shutil.rmtree(second_path) + third_import = load.load(third_path) + fn = third_import.signatures["serving_default"] + self.assertAllClose({"output": [2, 0]}, + fn(start=constant_op.constant(["gamma", "alpha"]))) if __name__ == "__main__": test.main() -- GitLab From 3d89145866b78b3ea5849e592ad9c61cf6ed91e5 Mon Sep 17 00:00:00 2001 From: Peng Wang Date: Thu, 14 Feb 2019 10:46:15 -0800 Subject: [PATCH 143/351] Fixed a bug where compute_gradient_v2 didn't get added to python/__init__.py . PiperOrigin-RevId: 233986266 --- tensorflow/python/__init__.py | 1 + tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 398fb375e1..8538f8c5dc 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -84,6 +84,7 @@ from tensorflow.python.feature_column import feature_column_lib as feature_colum from tensorflow.python.layers import layers from tensorflow.python.module import module from tensorflow.python.ops import bitwise_ops as bitwise +from tensorflow.python.ops import gradient_checker_v2 from tensorflow.python.ops import image_ops as image from tensorflow.python.ops import manip_ops as manip from tensorflow.python.ops import metrics diff --git a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt index 3c77788cb0..ac9dd8f718 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.test.pbtxt @@ -16,6 +16,10 @@ tf_module { name: "benchmark_config" argspec: "args=[], varargs=None, keywords=None, defaults=None" } + member_method { + name: "compute_gradient" + argspec: "args=[\'f\', \'x\', \'delta\'], varargs=None, keywords=None, defaults=[\'0.001\'], " + } member_method { name: "create_local_cluster" argspec: "args=[\'num_workers\', \'num_ps\', \'protocol\', \'worker_config\', \'ps_config\'], varargs=None, keywords=None, defaults=[\'grpc\', \'None\', \'None\'], " -- GitLab From b8392684999b738d0d4cb367381075dc3b925fef Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 14 Feb 2019 11:20:58 -0800 Subject: [PATCH 144/351] Change deprecated setHalf2Mode -> setFp16Mode. Also allow int8 mode to fp16 kernels when they are more performant. --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 002526c04b..4cc72e50b2 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -3770,8 +3770,11 @@ tensorflow::Status ConvertGraphDefToEngine( builder->setMaxWorkspaceSize(max_workspace_size_bytes); builder->setGpuAllocator(allocator); if (precision_mode == TrtPrecisionMode::FP16) { - builder->setHalf2Mode(true); + builder->setFp16Mode(true); } else if (precision_mode == TrtPrecisionMode::INT8) { + // Setting FP16 mode as well allows TRT to also consider FP16 kernels and + // use them in situations where they are faster than INT8. + builder->setFp16Mode(true); builder->setInt8Mode(true); if (use_calibration) { builder->setInt8Calibrator(calibrator); -- GitLab From 58582af14c5ae47def8ff807310895c9dfbc4964 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 10:47:38 -0800 Subject: [PATCH 145/351] Fix the TF_NIGHTLY_REGEX to allow for cpu_py34 2.0 preview build to pass. PiperOrigin-RevId: 233986567 --- tensorflow/tools/ci_build/copy_binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 40a7443745..aec1d7e28d 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,7 +32,7 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)-(\d\.[\d]{1,2}" +TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)(?:_2.0_preview?)?-(\d\.[\d]{1,2}" r"\.\d.dev[\d]{0,8})-(.+)\.whl") BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" -- GitLab From 95e5de50f05a10422a4b84d2f647e871aaffafc2 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 14 Feb 2019 11:24:29 -0800 Subject: [PATCH 146/351] Improve comment --- tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc index 4cc72e50b2..d814094919 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc @@ -3773,7 +3773,8 @@ tensorflow::Status ConvertGraphDefToEngine( builder->setFp16Mode(true); } else if (precision_mode == TrtPrecisionMode::INT8) { // Setting FP16 mode as well allows TRT to also consider FP16 kernels and - // use them in situations where they are faster than INT8. + // use them in situations where they are faster than INT8 or where INT8 is + // not supported for a given layer. builder->setFp16Mode(true); builder->setInt8Mode(true); if (use_calibration) { -- GitLab From b7f91e57d81b6ed1e84bd589f7f847ce76959e09 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 14 Feb 2019 10:48:10 -0800 Subject: [PATCH 147/351] [XLA] Parameter_replication annotation. Use a list of booleans to represent whether each leaf buffer will receive the same data in data parallelism. PiperOrigin-RevId: 233986693 --- tensorflow/compiler/xla/service/hlo.proto | 5 +- .../compiler/xla/service/hlo_instruction.cc | 17 +++++ .../compiler/xla/service/hlo_instruction.h | 10 +++ .../compiler/xla/service/hlo_instructions.cc | 21 ++++++ .../compiler/xla/service/hlo_instructions.h | 22 ++++++ tensorflow/compiler/xla/service/hlo_parser.cc | 74 +++++++++++++++++++ tensorflow/compiler/xla/service/hlo_parser.h | 11 ++- .../compiler/xla/service/hlo_parser_test.cc | 24 ++++++ tensorflow/compiler/xla/xla_data.proto | 12 +++ 9 files changed, 192 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index d2c995d87a..ae9e3169fd 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 61 +// Next ID: 62 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -199,6 +199,9 @@ message HloInstructionProto { // Options for TriangularSolve xla.TriangularSolveOptions triangular_solve_options = 59; + + // Describes how parameters behave with regards to replicas. + xla.ParameterReplication parameter_replication = 61; } // Serialization of HloComputation. diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 8ece90e05c..33c2270eb0 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -304,6 +304,10 @@ StatusOr> HloInstruction::CreateFromProto( case HloOpcode::kParameter: instruction = CreateParameter(proto.parameter_number(), shape, proto.name()); + if (!proto.parameter_replication().replicated_at_leaf_buffers().empty()) { + instruction->set_parameter_replicated_at_leaf_buffers( + proto.parameter_replication().replicated_at_leaf_buffers()); + } break; case HloOpcode::kGetTupleElement: TF_RET_CHECK(proto.operand_ids_size() == 1) @@ -3322,6 +3326,19 @@ int64 HloInstruction::parameter_number() const { return Cast(this)->parameter_number(); } +void HloInstruction::set_parameter_replicated_at_leaf_buffers( + absl::Span parameter_replicated_at_leaf_buffers) { + return Cast(this) + ->set_parameter_replicated_at_leaf_buffers( + parameter_replicated_at_leaf_buffers); +} + +const absl::optional>& +HloInstruction::parameter_replicated_at_leaf_buffers() const { + return Cast(this) + ->parameter_replicated_at_leaf_buffers(); +} + int64 HloInstruction::tuple_index() const { return Cast(this)->tuple_index(); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 8470cf7ec5..4c2ccfbb60 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_sharding.h" #include "tensorflow/compiler/xla/service/name_uniquer.h" +#include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" @@ -1468,6 +1469,15 @@ class HloInstruction { // Delegates to HloParameterInstruction::parameter_number. int64 parameter_number() const; + // Delegates to + // HloParameterInstruction::set_parameter_replicated_at_leaf_buffers. + void set_parameter_replicated_at_leaf_buffers( + absl::Span parameter_replicated_at_leaf_buffers); + + // Delegates to HloParameterInstruction::parameter_replicated_at_leaf_buffers. + const absl::optional>& + parameter_replicated_at_leaf_buffers() const; + // Delegates to HloGetTupleElementInstruction::tuple_index. int64 tuple_index() const; diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 7c8d98b429..f42b4afa5d 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1535,9 +1535,30 @@ HloParameterInstruction::HloParameterInstruction(int64 parameter_number, HloInstructionProto HloParameterInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); proto.set_parameter_number(parameter_number_); + if (parameter_replicated_at_leaf_buffers_) { + for (bool replicated : *parameter_replicated_at_leaf_buffers_) { + proto.mutable_parameter_replication()->add_replicated_at_leaf_buffers( + replicated); + } + } return proto; } +std::vector HloParameterInstruction::ExtraAttributesToStringImpl( + const HloPrintOptions& /*options*/) const { + std::vector result; + if (!parameter_replicated_at_leaf_buffers_) { + return result; + } + std::vector buffers_replicated_strs; + for (bool replicated : *parameter_replicated_at_leaf_buffers_) { + buffers_replicated_strs.push_back(replicated ? "true" : "false"); + } + result.push_back(StrCat("parameter_replication={", + StrJoin(buffers_replicated_strs, ","), "}")); + return result; +} + string HloParameterInstruction::OperandsToStringWithCanonicalNameMap( const HloPrintOptions& options, CanonicalNameMap* canonical_name_map) const { diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 8bb37ab435..4d23cb671f 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -817,10 +817,28 @@ class HloParameterInstruction : public HloInstruction { explicit HloParameterInstruction(int64 parameter_number, const Shape& shape, const string& name); int64 parameter_number() const { return parameter_number_; } + + // Sets and gets the whether all replicas will receive the same parameter data + // for each leaf buffer in data parallelism. + void set_parameter_replicated_at_leaf_buffers( + absl::Span parameter_replicated_at_leaf_buffers) { + CHECK_EQ(ShapeUtil::GetLeafCount(shape()), + parameter_replicated_at_leaf_buffers.size()); + parameter_replicated_at_leaf_buffers_.emplace( + parameter_replicated_at_leaf_buffers.begin(), + parameter_replicated_at_leaf_buffers.end()); + } + const absl::optional>& + parameter_replicated_at_leaf_buffers() const { + return parameter_replicated_at_leaf_buffers_; + } + // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; private: + std::vector ExtraAttributesToStringImpl( + const HloPrintOptions& options) const override; bool IdenticalSlowPath( const HloInstruction& other, const std::function& @@ -834,6 +852,10 @@ class HloParameterInstruction : public HloInstruction { HloCloneContext* context) const override; int64 parameter_number_ = 0; + + // Specifies whether each buffer has the same parameter value on all replicas + // in data parallelism. + absl::optional> parameter_replicated_at_leaf_buffers_; }; class HloGetTupleElementInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index b8e699fee2..f448571082 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -82,6 +82,7 @@ class HloParser { // Stand alone parsing utils for various aggregate data types. StatusOr ParseShapeOnly(); StatusOr ParseShardingOnly(); + StatusOr> ParseParameterReplicationOnly(); StatusOr ParseWindowOnly(); StatusOr ParseConvolutionDimensionNumbersOnly(); StatusOr ParsePaddingConfigOnly(); @@ -183,6 +184,7 @@ class HloParser { kWindow, kConvolutionDimensionNumbers, kSharding, + kParameterReplication, kInstructionList, kSliceRanges, kPaddingConfig, @@ -247,6 +249,7 @@ class HloParser { bool ParseMetadata(OpMetadata* metadata); bool ParseSharding(OpSharding* sharding); bool ParseSingleSharding(OpSharding* sharding, bool lbrace_pre_lexed); + bool ParseParameterReplication(ParameterReplication* parameter_replication); // Parses the metadata behind a kDOmain instruction. bool ParseDomain(DomainData* domain); @@ -644,6 +647,10 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder, std::unordered_map attrs; optional sharding; attrs["sharding"] = {/*required=*/false, AttrTy::kSharding, &sharding}; + optional parameter_replication; + attrs["parameter_replication"] = {/*required=*/false, + AttrTy::kParameterReplication, + ¶meter_replication}; optional> predecessors; attrs["control-predecessors"] = {/*required=*/false, AttrTy::kInstructionList, &predecessors}; @@ -1678,6 +1685,18 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder, instruction->set_sharding( HloSharding::FromProto(sharding.value()).ValueOrDie()); } + if (parameter_replication) { + int leaf_count = ShapeUtil::GetLeafCount(instruction->shape()); + const auto& replicated = + parameter_replication->replicated_at_leaf_buffers(); + if (leaf_count != replicated.size()) { + return Error(lexer_.GetLoc(), + StrCat("parameter has ", leaf_count, + " leaf buffers, but parameter_replication has ", + replicated.size(), " elements.")); + } + instruction->set_parameter_replicated_at_leaf_buffers(replicated); + } if (predecessors) { for (auto* pre : *predecessors) { Status status = pre->AddControlDependencyTo(instruction); @@ -1837,6 +1856,32 @@ bool HloParser::ParseSingleSharding(OpSharding* sharding, return true; } +// parameter_replication ::= +// '{' ('true' | 'false')* (',' ('true' | 'false'))* '}' +bool HloParser::ParseParameterReplication( + ParameterReplication* parameter_replication) { + if (!ParseToken(TokKind::kLbrace, + "expected '{' to start parameter_replication attribute")) { + return false; + } + + if (lexer_.GetKind() != TokKind::kRbrace) { + do { + if (lexer_.GetKind() == TokKind::kw_true) { + parameter_replication->add_replicated_at_leaf_buffers(true); + } else if (lexer_.GetKind() == TokKind::kw_false) { + parameter_replication->add_replicated_at_leaf_buffers(false); + } else { + return false; + } + lexer_.Lex(); + } while (EatIfPresent(TokKind::kComma)); + } + + return ParseToken(TokKind::kRbrace, + "expected '}' to end parameter_replication attribute"); +} + // domain ::= '{' 'kind=' domain_kind ',' 'entry=' entry_sharding ',' // 'exit=' exit_sharding '}' bool HloParser::ParseDomain(DomainData* domain) { @@ -2687,6 +2732,15 @@ bool HloParser::ParseAttributeHelper( static_cast*>(attr_out_ptr)->emplace(sharding); return true; } + case AttrTy::kParameterReplication: { + ParameterReplication parameter_replication; + if (!ParseParameterReplication(¶meter_replication)) { + return false; + } + static_cast*>(attr_out_ptr) + ->emplace(parameter_replication); + return true; + } case AttrTy::kInstructionList: { std::vector result; if (!ParseInstructionNames(&result)) { @@ -3788,6 +3842,21 @@ StatusOr HloParser::ParseShardingOnly() { return HloSharding::FromProto(op_sharding); } +StatusOr> HloParser::ParseParameterReplicationOnly() { + lexer_.Lex(); + ParameterReplication parameter_replication; + if (!ParseParameterReplication(¶meter_replication)) { + return InvalidArgument("Syntax error:\n%s", GetError()); + } + if (lexer_.GetKind() != TokKind::kEof) { + return InvalidArgument( + "Syntax error:\nExtra content after parameter replication"); + } + return std::vector( + parameter_replication.replicated_at_leaf_buffers().begin(), + parameter_replication.replicated_at_leaf_buffers().end()); +} + StatusOr HloParser::ParseWindowOnly() { lexer_.Lex(); Window window; @@ -3903,6 +3972,11 @@ StatusOr ParseSharding(absl::string_view str) { return parser.ParseShardingOnly(); } +StatusOr> ParseParameterReplication(absl::string_view str) { + HloParser parser(str); + return parser.ParseParameterReplicationOnly(); +} + StatusOr ParseWindow(absl::string_view str) { HloParser parser(str); return parser.ParseWindowOnly(); diff --git a/tensorflow/compiler/xla/service/hlo_parser.h b/tensorflow/compiler/xla/service/hlo_parser.h index 450a54c54c..a96260b4d7 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.h +++ b/tensorflow/compiler/xla/service/hlo_parser.h @@ -44,11 +44,16 @@ Status ParseHloString(absl::string_view str, HloModule* module); // creates a HloModule with default config. StatusOr> ParseHloString(absl::string_view str); -// ParseHloString sharding from str. str is supposed to contain the body of the -// sharding, i.e. just the rhs of the "sharding={...}" attribute string, -// e.g., "{replicated}". +// Parses sharding from str. str is supposed to contain the body of the +// sharding, i.e. just the rhs of the "sharding={...}" attribute string, e.g., +// "{replicated}". StatusOr ParseSharding(absl::string_view str); +// Parses parameter replication from str. str is supposed to contain the body of +// the parameter replication, i.e. just the rhs of the +// "parameter_replication={...}" attribute string, e.g., "{true, false}". +StatusOr> ParseParameterReplication(absl::string_view str); + // Parses the result of window_util::ToString(const Window&). StatusOr ParseWindow(absl::string_view str); diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 4b9453cfd7..8e3f1e44b9 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -63,6 +63,19 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { ROOT %add = f32[2,4]{1,0} add(f32[2,4]{1,0} %multiply, f32[2,4]{1,0} %y) } +)" +}, +// parameter replication +{ +"ParamReplication", +R"(HloModule param_replication_module + +ENTRY %param_replication (a: f32[], b: (f32[2,4], (f32[2,4]))) -> (f32[], (f32[2,4], (f32[2,4]))) { + %a = f32[] parameter(0), parameter_replication={true} + %b = (f32[2,4]{1,0}, (f32[2,4]{1,0})) parameter(1), parameter_replication={false,true} + ROOT %tuple = (f32[], (f32[2,4]{1,0}, (f32[2,4]{1,0}))) tuple(f32[] %a, (f32[2,4]{1,0}, (f32[2,4]{1,0})) %b) +} + )" }, // pred constant @@ -2710,5 +2723,16 @@ TEST_F(HloParserTest, NegativeParameterNumber) { ::testing::HasSubstr("parameter number must be >= 0")); } +TEST_F(HloParserTest, WrongNumberOfParameterLeafBuffersInReplication) { + const string hlo_string = + "par0 = (f32[3,5], f32[]) parameter(0), " + "parameter_replication={true,false,true}"; + auto result = ParseHloString(hlo_string); + ASSERT_FALSE(result.status().ok()); + EXPECT_THAT(result.status().error_message(), + ::testing::HasSubstr("parameter has 2 leaf buffers, but " + "parameter_replication has 3 elements")); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 4e127356a9..226299a718 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -624,3 +624,15 @@ message PrecisionConfig { // Next: 2 } + +// Describes whether all data-parallelism replicas will receive the same +// parameter data at each buffer. +message ParameterReplication { + // A list of boolean values for the flattened leaf buffers. Each value + // indicates whether the corresponding leaf buffer is replicated. + // + // If this field is empty, it means no buffer is replicated. Otherwise, the + // number of elements in this field must match the number of leaf buffers in + // the HLO instruction's shape. + repeated bool replicated_at_leaf_buffers = 1; +} -- GitLab From 5f54f3d499bfb8d191ce426bf1325b9e5b564b06 Mon Sep 17 00:00:00 2001 From: James Ring Date: Thu, 14 Feb 2019 10:56:16 -0800 Subject: [PATCH 148/351] Introduce key method in PtrOpKernelFactory to avoid weak vtable This change reduces the number of instances of PtrOpKernelFactory vtable that the linker has to dedupe. PiperOrigin-RevId: 233988633 --- tensorflow/core/framework/op_kernel.cc | 5 +++++ tensorflow/core/framework/op_kernel.h | 20 +++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 789f0fda75..c0b81e8538 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -1074,6 +1074,11 @@ void OpKernelRegistrar::InitInternal(const KernelDef* kernel_def, delete kernel_def; } +OpKernel* OpKernelRegistrar::PtrOpKernelFactory::Create( + OpKernelConstruction* context) { + return (*create_func_)(context); +} + } // namespace kernel_factory namespace { diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index f128b40724..ff0b44650c 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -1467,23 +1467,21 @@ class OpKernelRegistrar { // Perform the check in the header to allow compile-time optimization // to a no-op, allowing the linker to remove the kernel symbols. if (kernel_def != nullptr) { - struct PtrOpKernelFactory : public OpKernelFactory { - explicit PtrOpKernelFactory( - OpKernel* (*create_func)(OpKernelConstruction*)) - : create_func_(create_func) {} - - OpKernel* Create(OpKernelConstruction* context) override { - return (*create_func_)(context); - } - - OpKernel* (*create_func_)(OpKernelConstruction*); - }; InitInternal(kernel_def, kernel_class_name, absl::make_unique(create_fn)); } } private: + struct PtrOpKernelFactory : public OpKernelFactory { + explicit PtrOpKernelFactory(OpKernel* (*create_func)(OpKernelConstruction*)) + : create_func_(create_func) {} + + OpKernel* Create(OpKernelConstruction* context) override; + + OpKernel* (*create_func_)(OpKernelConstruction*); + }; + void InitInternal(const KernelDef* kernel_def, StringPiece kernel_class_name, std::unique_ptr factory); }; -- GitLab From 5477030ad9ae3c7114c4283367be3c9781ea2433 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 11:12:29 -0800 Subject: [PATCH 149/351] Adding root permission for pip versions that need to be run as root. PiperOrigin-RevId: 233992266 --- tensorflow/tools/ci_build/builds/pip_new.sh | 41 +++++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh index cb3853c5df..079ba90043 100755 --- a/tensorflow/tools/ci_build/builds/pip_new.sh +++ b/tensorflow/tools/ci_build/builds/pip_new.sh @@ -439,16 +439,31 @@ install_tensorflow_pip() { TF_WHEEL_PATH="${1}" + # Set path to pip. + PIP_BIN_PATH="$(which pip${PYTHON_VER_CFG})" + + # Store the original values for the global vars. + PYTHON_BIN_PATH_TMP=${PYTHON_BIN_PATH} + PIP_BIN_PATH_TMP=${PIP_BIN_PATH} + + # If in virtualenv, use default python and pip set up for the venv. + IN_VENV=$(python -c 'import sys; print("1" if hasattr(sys, "real_prefix") else "0")') + if [[ $IN_VENV == "1" ]]; then + PYTHON_BIN_PATH=$(which python) + PIP_BIN_PATH=$(which pip) + fi + + # Print python and pip bin paths + echo "PYTHON_BIN_PATH to be used to install the .whl: ${PYTHON_BIN_PATH}" + echo "PIP_BIN_PATH to be used to install the .whl: ${PIP_BIN_PATH}" + # Upgrade pip so it supports tags such as cp27mu, manylinux1 etc. echo "Upgrade pip in virtualenv" # NOTE: pip install --upgrade pip leads to a documented TLS issue for # some versions in python - curl https://bootstrap.pypa.io/get-pip.py | ${PYTHON_BIN_PATH} - - # Configure matching pip version with python. - PIP_BIN_PATH="$(which pip${PYTHON_VER_CFG})" - echo "PIP_BIN_PATH: ${PIP_BIN_PATH}" + curl https://bootstrap.pypa.io/get-pip.py | ${PYTHON_BIN_PATH} || \ + die "Error: pip install (get-pip.py) FAILED" # Check that requested python version matches configured one. check_python_pip_version @@ -457,7 +472,8 @@ install_tensorflow_pip() { # WHL_PATH, which pulls in absl-py, which uses install_requires notation # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1, # which is too old for absl-py. - ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 + ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \ + die "Error: setuptools install, upgrade FAILED" # Force tensorflow reinstallation. Otherwise it may not get installed from # last build if it had the same version number as previous build. @@ -470,7 +486,18 @@ install_tensorflow_pip() { # WHL_PATH, which ends up upgrading to the latest version of setuptools. # Versions of setuptools >= 39.1.0 will cause tests to fail like this: # ImportError: cannot import name py31compat - ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 + ${PIP_BIN_PATH} install --upgrade setuptools==39.1.0 || \ + die "Error: setuptools install, upgrade FAILED" + + # Set python and pip bin paths to original. + if [[ $IN_VENV == "1" ]]; then + PYTHON_BIN_PATH=${PYTHON_BIN_PATH_TMP} + PIP_BIN_PATH=${PIP_BIN_PATH_TMP} + fi + + # Print the outgoing python and pip bin paths. + echo "PYTHON_BIN_PATH: ${PYTHON_BIN_PATH}" + echo "PIP_BIN_PATH: ${PIP_BIN_PATH}" } run_test_with_bazel() { -- GitLab From 53663a13748a1c360702f35d1e18856a1fca66b5 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Thu, 14 Feb 2019 11:23:23 -0800 Subject: [PATCH 150/351] [tf.data] Evaluate `max_intro_op_parallelism` threading option before `private_threadpool_size` option so that it applies to the private threadpool if both options are set. PiperOrigin-RevId: 233994494 --- tensorflow/python/data/ops/dataset_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 4af37d401b..6abf63016f 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -174,12 +174,12 @@ class DatasetV2(object): options = self.options() if options.experimental_threading is not None: t_options = options.experimental_threading - if t_options.private_threadpool_size is not None: - dataset = _PrivateThreadPoolDataset(dataset, - t_options.private_threadpool_size) if t_options.max_intra_op_parallelism is not None: dataset = _MaxIntraOpParallelismDataset( dataset, t_options.max_intra_op_parallelism) + if t_options.private_threadpool_size is not None: + dataset = _PrivateThreadPoolDataset(dataset, + t_options.private_threadpool_size) static_optimizations = options._static_optimizations() # pylint: disable=protected-access if static_optimizations: if self._has_captured_ref(): -- GitLab From b8031d1f944927f6c7033877d3253b49b396e07a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 11:28:44 -0800 Subject: [PATCH 151/351] Improve error messages when undefined variables would be passed to TensorFlow API calls. PiperOrigin-RevId: 233995490 --- .../autograph/converters/control_flow.py | 7 ++- .../python/autograph/operators/__init__.py | 3 +- .../autograph/operators/control_flow.py | 60 ++++++++++++++++++- .../autograph/operators/special_values.py | 46 +++++++++++++- .../operators/special_values_test.py | 10 +++- 5 files changed, 118 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/autograph/converters/control_flow.py b/tensorflow/python/autograph/converters/control_flow.py index aa7bf4666c..90432e808f 100644 --- a/tensorflow/python/autograph/converters/control_flow.py +++ b/tensorflow/python/autograph/converters/control_flow.py @@ -260,9 +260,12 @@ class ControlFlowTransformer(converter.Base): assignments = [] for s in undefined_symbols: template = ''' - var = ag__.UNDEFINED + var = ag__.Undefined(symbol_name) ''' - assignments += templates.replace(template, var=s) + assignments += templates.replace( + template, + var=s, + symbol_name=gast.Str(s.ssf())) return assignments def _get_loop_state(self, node): diff --git a/tensorflow/python/autograph/operators/__init__.py b/tensorflow/python/autograph/operators/__init__.py index 58ed72b700..5b3f45de05 100644 --- a/tensorflow/python/autograph/operators/__init__.py +++ b/tensorflow/python/autograph/operators/__init__.py @@ -71,4 +71,5 @@ from tensorflow.python.autograph.operators.py_builtins import range_ from tensorflow.python.autograph.operators.slices import get_item from tensorflow.python.autograph.operators.slices import GetItemOpts from tensorflow.python.autograph.operators.slices import set_item -from tensorflow.python.autograph.operators.special_values import UNDEFINED +from tensorflow.python.autograph.operators.special_values import is_undefined +from tensorflow.python.autograph.operators.special_values import Undefined diff --git a/tensorflow/python/autograph/operators/control_flow.py b/tensorflow/python/autograph/operators/control_flow.py index 707064cefa..414d53378f 100644 --- a/tensorflow/python/autograph/operators/control_flow.py +++ b/tensorflow/python/autograph/operators/control_flow.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from tensorflow.python.autograph.operators import py_builtins +from tensorflow.python.autograph.operators import special_values from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import control_flow_ops @@ -62,6 +63,17 @@ def for_stmt(iter_, extra_test, body, init_state): if tensor_util.is_tensor(iter_): return _known_len_for_stmt(iter_, extra_test, body, init_state) elif isinstance(iter_, dataset_ops.DatasetV2): + # Check for undefined symbols and report an error. This prevents the error + # from propagating into the TF runtime. We have more information here and + # can provide a clearer error message. + undefined_symbols = _filter_undefined(init_state) + + if undefined_symbols: + raise ValueError( + 'TensorFlow requires that the following symbols must be initialized ' + 'to a Tensor, Variable or TensorArray before the loop: {}' + .format(tuple(undefined_symbols))) + return _dataset_for_stmt(iter_, extra_test, body, init_state) else: return _py_for_stmt(iter_, extra_test, body, init_state) @@ -154,11 +166,31 @@ def while_stmt(test, body, init_state, extra_deps, opts=None): # That could be something as simple as a collection of dispatch rules, with # some prioritization. if any(tensor_util.is_tensor(v) for v in nest.flatten(extra_deps)): + # Check for undefined symbols and report an error. This prevents the error + # from propagating into the TF runtime. We have more information here and + # can provide a clearer error message. + undefined_symbols = _filter_undefined(init_state) + + if undefined_symbols: + raise ValueError( + 'TensorFlow requires that the following symbols must be initialized ' + 'to a Tensor, Variable or TensorArray before the loop: {}' + .format(tuple(undefined_symbols))) return _tf_while_stmt(test, body, init_state, opts) else: return _py_while_stmt(test, body, init_state, opts) +def _filter_undefined(all_symbols): + """Returns the names of undefined symbols contained in all_symbols.""" + undefined_symbols = [ + s.symbol_name + for s in all_symbols + if special_values.is_undefined(s) + ] + return undefined_symbols + + def _tf_while_stmt(test, body, init_state, opts): """Overload of while_stmt that stages a TF while_stmt.""" if opts is None: @@ -202,7 +234,33 @@ def if_stmt(cond, body, orelse): def tf_if_stmt(cond, body, orelse): """Overload of if_stmt that stages a TF cond.""" - return control_flow_ops.cond(cond, body, orelse) + protected_body = _wrap_in_protection_from_undefined(body, branch_name='if') + protected_orelse = _wrap_in_protection_from_undefined(orelse, + branch_name='else') + + return control_flow_ops.cond(cond, protected_body, protected_orelse) + + +def _wrap_in_protection_from_undefined(func, branch_name): + """Wraps function to raise useful error when it returns undefined symbols.""" + def protected_func(): + """Calls function and raises an error if undefined symbols are returned.""" + results = func() + undefined_symbols = None + if isinstance(results, tuple): + undefined_symbols = _filter_undefined(results) + elif special_values.is_undefined(results): + # Single return value + undefined_symbols = results.symbol_name + + if undefined_symbols: + message = ('The following symbols must also be initialized in the %s ' + 'branch: {}. Alternatively, you may initialize them before ' + 'the if statement.') % branch_name + message = message.format(undefined_symbols) + raise ValueError(message) + return results + return protected_func def _py_if_stmt(cond, body, orelse): diff --git a/tensorflow/python/autograph/operators/special_values.py b/tensorflow/python/autograph/operators/special_values.py index 08a1013524..4c1b3d1f30 100644 --- a/tensorflow/python/autograph/operators/special_values.py +++ b/tensorflow/python/autograph/operators/special_values.py @@ -19,5 +19,47 @@ from __future__ import division from __future__ import print_function -# Used to reify undefined Python symbols so they can be used during staging. -UNDEFINED = object() +class Undefined(object): + """Represents an undefined symbol in Python. + + This is used to reify undefined symbols, which is required to use the + functional form of loops. + Example: + + while n > 0: + n = n - 1 + s = n + return s # Runtime error if n == 0 + + This is valid Python code and will not result in an error as long as n + is positive. The use of this class is to stay as close to Python semantics + as possible for staged code of this nature. + + Converted version of the above showing the possible usage of this class: + + s = Undefined('s') + init_state = (s,) + s = while_loop(cond, body, init_state) + return s # s is an instance of Undefined if the loop never runs + + Attributes: + symbol_name: Text, identifier for the undefined symbol + """ + + def __init__(self, symbol_name): + self.symbol_name = symbol_name + + +def is_undefined(value): + """Checks whether Autograph has determined that a given value is undefined. + + This only works in places where Autograph reifies undefined symbols. Note that + if this function is passed a truly undefined symbol the call-site will raise + NameError. + + Args: + value: value to test for undefinedness + Returns: + Boolean, whether the input value is undefined. + """ + return isinstance(value, Undefined) diff --git a/tensorflow/python/autograph/operators/special_values_test.py b/tensorflow/python/autograph/operators/special_values_test.py index 45fcf45067..2e1e087a9f 100644 --- a/tensorflow/python/autograph/operators/special_values_test.py +++ b/tensorflow/python/autograph/operators/special_values_test.py @@ -22,11 +22,17 @@ from tensorflow.python.autograph.operators import special_values from tensorflow.python.platform import test -class PythonLangUtilsTest(test.TestCase): +class SpecialValuesTest(test.TestCase): def test_undefined(self): - self.assertIs(special_values.UNDEFINED, special_values.UNDEFINED) + undefined_symbol = special_values.Undefined('name') + self.assertEqual(undefined_symbol.symbol_name, 'name') + undefined_symbol2 = special_values.Undefined('name') + self.assertNotEqual(undefined_symbol, undefined_symbol2) + + self.assertTrue(special_values.is_undefined(undefined_symbol)) + self.assertTrue(special_values.is_undefined(undefined_symbol2)) if __name__ == '__main__': test.main() -- GitLab From d7c49ea30c10b61d1868eff16634963e9ff34a32 Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Thu, 14 Feb 2019 12:06:30 -0800 Subject: [PATCH 152/351] Fix bug that caused export_outputs_for_mode to create the wrong ExportOutput. PiperOrigin-RevId: 234003026 --- .../saved_model/model_utils/export_test.py | 48 +++++++++++++++++++ .../saved_model/model_utils/export_utils.py | 2 +- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/saved_model/model_utils/export_test.py b/tensorflow/python/saved_model/model_utils/export_test.py index df9769f809..c87d2ee6ae 100644 --- a/tensorflow/python/saved_model/model_utils/export_test.py +++ b/tensorflow/python/saved_model/model_utils/export_test.py @@ -31,6 +31,7 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.saved_model.model_utils import export_output from tensorflow.python.saved_model.model_utils import export_utils +from tensorflow.python.saved_model.model_utils.mode_keys import KerasModeKeys class ExportTest(test_util.TensorFlowTestCase): @@ -235,6 +236,53 @@ class ExportTest(test_util.TensorFlowTestCase): self.assertDictEqual(expected_signature_defs, signature_defs) + @test_util.deprecated_graph_mode_only + def test_export_outputs_for_mode(self): + predictions = {"predictions": constant_op.constant([1.])} + loss = {"loss": constant_op.constant([2.])} + metrics = { + "metrics": (constant_op.constant([3.]), constant_op.constant([4.]))} + expected_metrics = { + "metrics/value": metrics["metrics"][0], + "metrics/update_op": metrics["metrics"][1] + } + + def _build_export_output(mode): + return export_utils.export_outputs_for_mode( + mode, None, predictions, loss, metrics) + + ret = _build_export_output(KerasModeKeys.TRAIN) + self.assertIn(signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY, ret) + export_out = ret[signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY] + self.assertIsInstance(export_out, export_output.TrainOutput) + self.assertEqual(export_out.predictions, predictions) + self.assertEqual(export_out.loss, loss) + self.assertEqual(export_out.metrics, expected_metrics) + + ret = _build_export_output(KerasModeKeys.TEST) + self.assertIn(signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY, ret) + export_out = ret[signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY] + self.assertIsInstance(export_out, export_output.EvalOutput) + self.assertEqual(export_out.predictions, predictions) + self.assertEqual(export_out.loss, loss) + self.assertEqual(export_out.metrics, expected_metrics) + + ret = _build_export_output(KerasModeKeys.PREDICT) + self.assertIn(signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY, ret) + export_out = ret[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] + self.assertIsInstance(export_out, export_output.PredictOutput) + self.assertEqual(export_out.outputs, predictions) + + classes = constant_op.constant(["class5"]) + ret = export_utils.export_outputs_for_mode( + KerasModeKeys.PREDICT, + {"classify": export_output.ClassificationOutput( + classes=classes)}) + self.assertIn("classify", ret) + export_out = ret["classify"] + self.assertIsInstance(export_out, export_output.ClassificationOutput) + self.assertEqual(export_out.classes, classes) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/saved_model/model_utils/export_utils.py b/tensorflow/python/saved_model/model_utils/export_utils.py index 431b6dc345..e9f6f894c2 100644 --- a/tensorflow/python/saved_model/model_utils/export_utils.py +++ b/tensorflow/python/saved_model/model_utils/export_utils.py @@ -280,7 +280,7 @@ def export_outputs_for_mode( signature_key = SIGNATURE_KEY_MAP[mode] if mode_keys.is_predict(mode): return get_export_outputs(serving_export_outputs, predictions) - elif mode_keys.is_eval(mode): + elif mode_keys.is_train(mode): return {signature_key: export_output_lib.TrainOutput( loss=loss, predictions=predictions, metrics=metrics)} else: -- GitLab From 9b542ce7ef0cb6870da4622c1c8951ee83cdb4d5 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Thu, 14 Feb 2019 12:44:19 -0800 Subject: [PATCH 153/351] Create int8 tanh. PiperOrigin-RevId: 234009714 --- tensorflow/lite/kernels/activations.cc | 13 +++- tensorflow/lite/kernels/activations_test.cc | 25 +++++++- tensorflow/lite/kernels/internal/BUILD | 1 + .../internal/reference/integer_ops/tanh.h | 63 +++++++++++++++++++ tensorflow/lite/kernels/register.cc | 3 +- tensorflow/lite/toco/tflite/operator.cc | 17 ++++- tensorflow/lite/toco/tflite/operator_test.cc | 4 ++ 7 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index c0a34957df..0066d18994 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/quantization_util.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h" #include "tensorflow/lite/kernels/internal/reference/integer_ops/softmax.h" +#include "tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h" #include "tensorflow/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/lite/kernels/internal/tensor.h" #include "tensorflow/lite/kernels/kernel_util.h" @@ -119,7 +120,7 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, 0); TF_LITE_ENSURE_EQ(context, input->type, output->type); - if (input->type == kTfLiteUInt8) { + if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) { static constexpr int kInputIntegerBits = 4; const double input_real_multiplier = @@ -455,6 +456,16 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { } return kTfLiteOk; } break; + case kTfLiteInt8: { + const auto input_shape = GetTensorShape(input); + const auto output_shape = GetTensorShape(output); + const int size = MatchingFlatSize(input_shape, output_shape); + reference_integer_ops::Tanh( + input->params.zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, size, + GetTensorData(input), GetTensorData(output)); + return kTfLiteOk; + } break; default: context->ReportError(context, "Only float32 supported currently, got %s.", TfLiteTypeGetName(input->type)); diff --git a/tensorflow/lite/kernels/activations_test.cc b/tensorflow/lite/kernels/activations_test.cc index e30e62d75a..f005dac438 100644 --- a/tensorflow/lite/kernels/activations_test.cc +++ b/tensorflow/lite/kernels/activations_test.cc @@ -219,7 +219,7 @@ TEST(QuantizedActivationsOpTest, Relu6Int8) { ElementsAreArray({0, 0, 32, 64, 48, 0, 96, 16})); } -TEST(QuantizedActivationsOpTest, Tanh) { +TEST(QuantizedActivationsOpTest, TanhUint8) { const float kMin = -1; const float kMax = 127.f / 128.f; QuantizedActivationsOpModel m( @@ -242,6 +242,29 @@ TEST(QuantizedActivationsOpTest, Tanh) { ElementsAreArray({128, 0, 251, 255, 0, 5, 255, 225})); } +TEST(QuantizedActivationsOpTest, TanhInt8) { + const float kMin = -1; + const float kMax = 127.f / 128.f; + QuantizedActivationsOpModel m( + BuiltinOperator_TANH, + /*input=*/{TensorType_INT8, {1, 2, 4, 1}, 8 * kMin, 8 * kMax}, + /*output=*/{TensorType_INT8, {1, 2, 4, 1}, kMin, kMax}); + m.SetInput({ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0.0, -0.999987, 0.964027, 0.999329, // + -0.999329, -0.96402, 0.99999, 0.76159, // + }, + kQuantizedTolerance))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({0, -128, 123, 127, -128, -123, 127, 97})); +} + TEST(QuantizedActivationsOpTest, TanhInt16) { const float kMin = -1; const float kMax = 32767.f / 32768.f; diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index d4f1f53a77..37a99bcc0c 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -313,6 +313,7 @@ cc_library( "reference/integer_ops/logistic.h", "reference/integer_ops/pooling.h", "reference/integer_ops/softmax.h", + "reference/integer_ops/tanh.h", "reference/reference_ops.h", "reference/softmax.h", ], diff --git a/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h new file mode 100644 index 0000000000..081928bc88 --- /dev/null +++ b/tensorflow/lite/kernels/internal/reference/integer_ops/tanh.h @@ -0,0 +1,63 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ + +#include +#include "tensorflow/lite/kernels/internal/common.h" + +namespace tflite { +namespace reference_integer_ops { + +inline void Tanh(int32_t input_zero_point, int32_t input_range_radius, + int32_t input_multiplier, int32_t input_shift, + int32_t input_size, const int8_t* input_data, + int8_t* output_data) { + // Integer bits must be in sync with Prepare() function. + static constexpr int32_t kInputIntegerBits = 4; + static constexpr int32_t kOutputScale = 7; + static constexpr int8_t kMinInt8 = std::numeric_limits::min(); + static constexpr int8_t kMaxInt8 = std::numeric_limits::max(); + using F4 = gemmlowp::FixedPoint; + + for (int i = 0; i < input_size; ++i) { + const int32_t input = + static_cast(input_data[i]) - input_zero_point; + if (input <= -input_range_radius) { + output_data[i] = kMinInt8; + } else if (input >= input_range_radius) { + output_data[i] = kMaxInt8; + } else { + const int32_t input_in_q4 = + MultiplyByQuantizedMultiplier(input, input_multiplier, input_shift); + const int32_t output_in_q0 = + gemmlowp::tanh(F4::FromRaw(input_in_q4)).raw(); + + // Rescale and downcast. + using gemmlowp::RoundingDivideByPOT; + int32_t output_in_q24 = + RoundingDivideByPOT(output_in_q0, 31 - kOutputScale); + output_in_q24 = + std::min(std::max(output_in_q24, static_cast(kMinInt8)), + static_cast(kMaxInt8)); + output_data[i] = static_cast(output_in_q24); + } + } +} + +} // namespace reference_integer_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_TANH_H_ diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index d445129d0c..58f0dbe9e4 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -168,7 +168,8 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU_N1_TO_1, Register_RELU_N1_TO_1()); AddBuiltin(BuiltinOperator_RELU6, Register_RELU6(), /* min_version */ 1, /* max_version */ 2); - AddBuiltin(BuiltinOperator_TANH, Register_TANH()); + AddBuiltin(BuiltinOperator_TANH, Register_TANH(), /* min_version */ 1, + /* max_version */ 2); AddBuiltin(BuiltinOperator_LOGISTIC, Register_LOGISTIC(), /* min_version */ 1, /* max_version */ 2); diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index f22f7ff10b..e0cc0d7523 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -1678,6 +1678,20 @@ class Slice : public SimpleOperator { } }; +class Tanh : public SimpleOperator { + public: + explicit Tanh() : SimpleOperator("TANH", OperatorType::kTanh) {} + int GetVersion(const OperatorSignature& op_signature) const override { + const string& input_name = op_signature.op->inputs[0]; + const Array& input_array = op_signature.model->GetArray(input_name); + // Version 2 supports signed int8 input types. + if (input_array.data_type == ArrayDataType::kInt8) { + return 2; + } + return 1; + } +}; + class OneHot : public BuiltinOperator { public: @@ -2386,8 +2400,7 @@ std::vector> BuildOperatorList( ops.push_back( MakeUnique>("PRELU", OperatorType::kPRelu)); ops.push_back(MakeUnique()); - ops.push_back( - MakeUnique>("TANH", OperatorType::kTanh)); + ops.push_back(MakeUnique()); ops.push_back( MakeUnique>("EXP", OperatorType::kExp)); ops.push_back( diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index 62d3997a91..34faa1dce7 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -820,6 +820,10 @@ TEST_F(OperatorTest, VersioningBatchToSpaceNDTest) { SimpleVersioningTest(); } +TEST_F(OperatorTest, VersioningTanhTest) { + SimpleVersioningTest(); +} + TEST_F(OperatorTest, VersioningStridedSliceTest) { SimpleVersioningTest(); } -- GitLab From c314d0cd20752cac7707c4c278e7d9d9887a2d25 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 12:45:00 -0800 Subject: [PATCH 154/351] Internal change PiperOrigin-RevId: 234009849 --- tensorflow/core/platform/posix/env.cc | 2 +- tensorflow/core/platform/posix/port.cc | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc index b02b1f3f48..f2dff5a9b6 100644 --- a/tensorflow/core/platform/posix/env.cc +++ b/tensorflow/core/platform/posix/env.cc @@ -102,7 +102,7 @@ class PosixEnv : public Env { } bool GetCurrentThreadName(string* name) override { -#ifdef __ANDROID__ +#if defined(__ANDROID__) || defined(__EMSCRIPTEN__) return false; #else char buf[100]; diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index ea6066ac7b..807e008322 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -82,7 +82,9 @@ int NumTotalCPUs() { } int GetCurrentCPU() { -#if defined(__linux__) && !defined(__ANDROID__) +#if defined(__EMSCRIPTEN__) + return sched_getcpu(); +#elif defined(__linux__) && !defined(__ANDROID__) return sched_getcpu(); // Attempt to use cpuid on all other platforms. If that fails, perform a // syscall. -- GitLab From b51e21629485ab6b6388304f19301cdf9c88502d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 14 Feb 2019 12:51:22 -0800 Subject: [PATCH 155/351] [XLA] Don't crash in when trying to simplify batch dots with no contraction dims Dot with no contraction is just a multiply. Not a super useful operation but valid HLO. AlgebraicSimplifier will rewrite it into a multiply, so don't even try simplifying it. PiperOrigin-RevId: 234010983 --- .../xla/service/batch_dot_simplification.cc | 7 ++++ .../service/batch_dot_simplification_test.cc | 42 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/tensorflow/compiler/xla/service/batch_dot_simplification.cc b/tensorflow/compiler/xla/service/batch_dot_simplification.cc index eda026ac56..dbabd82dd5 100644 --- a/tensorflow/compiler/xla/service/batch_dot_simplification.cc +++ b/tensorflow/compiler/xla/service/batch_dot_simplification.cc @@ -28,6 +28,13 @@ BatchDotSimplification::ElideDegenerateBatchDimensionFromBatchDot( *rhs = batch_dot->mutable_operand(1); const Shape& lhs_shape = lhs->shape(); + // A dot with no contracting dims will be rewritten into a multiply by + // AlgebraicSimplifier. Dots with multiple contracting dims are currently + // unsupported. + if (dim_numbers.lhs_contracting_dimensions_size() != 1) { + return false; + } + std::vector degenerate_dims; for (int64 batch_dim : dim_numbers.lhs_batch_dimensions()) { if (lhs_shape.dimensions(batch_dim) == 1) { diff --git a/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc b/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc index 52ec1a794c..a81f394a38 100644 --- a/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc +++ b/tensorflow/compiler/xla/service/batch_dot_simplification_test.cc @@ -169,5 +169,47 @@ main { /*lhs_contracting_dim=*/3, /*rhs_contracting_dim=*/2))); } +TEST_F(BatchDotSimplificationTest, + ElideMultipleDegenerateBatchDotDimsNonContracting) { + const char* hlo_text = R"( +HloModule BatchDot + +main { + a = f32[1,101] parameter(0) + b = f32[1,101] parameter(1) + ROOT dot = f32[1,101,101] dot(a,b), lhs_batch_dims={0}, + lhs_contracting_dims={}, + rhs_batch_dims={0}, + rhs_contracting_dims={} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr m, + ParseAndReturnVerifiedModule(hlo_text)); + BatchDotSimplification pass; + ASSERT_FALSE(pass.Run(m.get()).ValueOrDie()); +} + +TEST_F(BatchDotSimplificationTest, + ElideMultipleDegenerateBatchDotDimsMultipleContracting) { + const char* hlo_text = R"( +HloModule BatchDot + +main { + lhs = f32[1,5,17,10,13] parameter(0) + rhs = f32[1,9,10,13,6,5] parameter(1) + ROOT dot = f32[10,1,17,9,6] dot(lhs,rhs), lhs_batch_dims={3,0}, + rhs_batch_dims={2,0}, + lhs_contracting_dims={1,4}, + rhs_contracting_dims={5,3} +} +)"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr m, + ParseAndReturnVerifiedModule(hlo_text)); + BatchDotSimplification pass; + ASSERT_FALSE(pass.Run(m.get()).ValueOrDie()); +} + } // namespace } // namespace xla -- GitLab From 264745232beca4314ae5e2124e62c6bf7fab2be3 Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Thu, 14 Feb 2019 13:03:58 -0800 Subject: [PATCH 156/351] Deprecate public SignatureDef symbols. The new SavedModel save() API doesn't allow manually defined SignatureDefs. There should be a replacement for creating the standard SignatureDefs. PiperOrigin-RevId: 234013299 --- .../saved_model/signature_def_utils_impl.py | 5 ----- .../golden/v2/tensorflow.saved_model.pbtxt | 20 ------------------- tensorflow/tools/compatibility/renames_v2.py | 17 +++++++++------- 3 files changed, 10 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/saved_model/signature_def_utils_impl.py b/tensorflow/python/saved_model/signature_def_utils_impl.py index f6e6e1d13e..2e0a0afeec 100644 --- a/tensorflow/python/saved_model/signature_def_utils_impl.py +++ b/tensorflow/python/saved_model/signature_def_utils_impl.py @@ -30,7 +30,6 @@ from tensorflow.python.util.tf_export import tf_export @tf_export( - 'saved_model.build_signature_def', v1=[ 'saved_model.build_signature_def', 'saved_model.signature_def_utils.build_signature_def' @@ -63,7 +62,6 @@ def build_signature_def(inputs=None, outputs=None, method_name=None): @tf_export( - 'saved_model.regression_signature_def', v1=[ 'saved_model.regression_signature_def', 'saved_model.signature_def_utils.regression_signature_def' @@ -112,7 +110,6 @@ def regression_signature_def(examples, predictions): @tf_export( - 'saved_model.classification_signature_def', v1=[ 'saved_model.classification_signature_def', 'saved_model.signature_def_utils.classification_signature_def' @@ -172,7 +169,6 @@ def classification_signature_def(examples, classes, scores): @tf_export( - 'saved_model.predict_signature_def', v1=[ 'saved_model.predict_signature_def', 'saved_model.signature_def_utils.predict_signature_def' @@ -270,7 +266,6 @@ def _supervised_signature_def( @tf_export( - 'saved_model.is_valid_signature', v1=[ 'saved_model.is_valid_signature', 'saved_model.signature_def_utils.is_valid_signature' diff --git a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt index f6ab7ac0ad..32d7c48be3 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.saved_model.pbtxt @@ -88,30 +88,10 @@ tf_module { name: "VARIABLES_FILENAME" mtype: "" } - member_method { - name: "build_signature_def" - argspec: "args=[\'inputs\', \'outputs\', \'method_name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " - } - member_method { - name: "classification_signature_def" - argspec: "args=[\'examples\', \'classes\', \'scores\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "contains_saved_model" argspec: "args=[\'export_dir\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "is_valid_signature" - argspec: "args=[\'signature_def\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "predict_signature_def" - argspec: "args=[\'inputs\', \'outputs\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "regression_signature_def" - argspec: "args=[\'examples\', \'predictions\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "save" argspec: "args=[\'obj\', \'export_dir\', \'signatures\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/compatibility/renames_v2.py b/tensorflow/tools/compatibility/renames_v2.py index 3f8372b51d..b8bdc71cbd 100644 --- a/tensorflow/tools/compatibility/renames_v2.py +++ b/tensorflow/tools/compatibility/renames_v2.py @@ -205,8 +205,6 @@ renames = { 'tf.get_variable': 'tf.compat.v1.get_variable', 'tf.get_variable_scope': 'tf.compat.v1.get_variable_scope', 'tf.gfile.FastGFile': 'tf.compat.v1.gfile.FastGFile', - 'tf.gfile.GFile': 'tf.io.gfile.GFile', - 'tf.gfile.Open': 'tf.io.gfile.GFile', 'tf.global_norm': 'tf.linalg.global_norm', 'tf.global_variables': 'tf.compat.v1.global_variables', 'tf.global_variables_initializer': 'tf.compat.v1.global_variables_initializer', @@ -501,8 +499,10 @@ renames = { 'tf.saved_model.Builder': 'tf.compat.v1.saved_model.Builder', 'tf.saved_model.LEGACY_INIT_OP_KEY': 'tf.compat.v1.saved_model.LEGACY_INIT_OP_KEY', 'tf.saved_model.MAIN_OP_KEY': 'tf.compat.v1.saved_model.MAIN_OP_KEY', + 'tf.saved_model.build_signature_def': 'tf.compat.v1.saved_model.build_signature_def', 'tf.saved_model.build_tensor_info': 'tf.compat.v1.saved_model.build_tensor_info', 'tf.saved_model.builder.SavedModelBuilder': 'tf.compat.v1.saved_model.builder.SavedModelBuilder', + 'tf.saved_model.classification_signature_def': 'tf.compat.v1.saved_model.classification_signature_def', 'tf.saved_model.constants.ASSETS_DIRECTORY': 'tf.saved_model.ASSETS_DIRECTORY', 'tf.saved_model.constants.ASSETS_KEY': 'tf.saved_model.ASSETS_KEY', 'tf.saved_model.constants.LEGACY_INIT_OP_KEY': 'tf.compat.v1.saved_model.constants.LEGACY_INIT_OP_KEY', @@ -514,6 +514,7 @@ renames = { 'tf.saved_model.constants.VARIABLES_FILENAME': 'tf.saved_model.VARIABLES_FILENAME', 'tf.saved_model.experimental.save': 'tf.saved_model.save', 'tf.saved_model.get_tensor_from_tensor_info': 'tf.compat.v1.saved_model.get_tensor_from_tensor_info', + 'tf.saved_model.is_valid_signature': 'tf.compat.v1.saved_model.is_valid_signature', 'tf.saved_model.load': 'tf.compat.v1.saved_model.load', 'tf.saved_model.loader.load': 'tf.compat.v1.saved_model.loader.load', 'tf.saved_model.loader.maybe_saved_model_directory': 'tf.compat.v1.saved_model.loader.maybe_saved_model_directory', @@ -521,6 +522,8 @@ renames = { 'tf.saved_model.main_op.main_op_with_restore': 'tf.compat.v1.saved_model.main_op.main_op_with_restore', 'tf.saved_model.main_op_with_restore': 'tf.compat.v1.saved_model.main_op_with_restore', 'tf.saved_model.maybe_saved_model_directory': 'tf.compat.v1.saved_model.maybe_saved_model_directory', + 'tf.saved_model.predict_signature_def': 'tf.compat.v1.saved_model.predict_signature_def', + 'tf.saved_model.regression_signature_def': 'tf.compat.v1.saved_model.regression_signature_def', 'tf.saved_model.signature_constants.CLASSIFY_INPUTS': 'tf.saved_model.CLASSIFY_INPUTS', 'tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME': 'tf.saved_model.CLASSIFY_METHOD_NAME', 'tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES': 'tf.saved_model.CLASSIFY_OUTPUT_CLASSES', @@ -532,11 +535,11 @@ renames = { 'tf.saved_model.signature_constants.REGRESS_INPUTS': 'tf.saved_model.REGRESS_INPUTS', 'tf.saved_model.signature_constants.REGRESS_METHOD_NAME': 'tf.saved_model.REGRESS_METHOD_NAME', 'tf.saved_model.signature_constants.REGRESS_OUTPUTS': 'tf.saved_model.REGRESS_OUTPUTS', - 'tf.saved_model.signature_def_utils.build_signature_def': 'tf.saved_model.build_signature_def', - 'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.saved_model.classification_signature_def', - 'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.saved_model.is_valid_signature', - 'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.saved_model.predict_signature_def', - 'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.saved_model.regression_signature_def', + 'tf.saved_model.signature_def_utils.build_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.build_signature_def', + 'tf.saved_model.signature_def_utils.classification_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.classification_signature_def', + 'tf.saved_model.signature_def_utils.is_valid_signature': 'tf.compat.v1.saved_model.signature_def_utils.is_valid_signature', + 'tf.saved_model.signature_def_utils.predict_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.predict_signature_def', + 'tf.saved_model.signature_def_utils.regression_signature_def': 'tf.compat.v1.saved_model.signature_def_utils.regression_signature_def', 'tf.saved_model.simple_save': 'tf.compat.v1.saved_model.simple_save', 'tf.saved_model.tag_constants.GPU': 'tf.saved_model.GPU', 'tf.saved_model.tag_constants.SERVING': 'tf.saved_model.SERVING', -- GitLab From 5b90573d414a7ef41b733b6dc4f8e457c62be982 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Thu, 14 Feb 2019 13:36:50 -0800 Subject: [PATCH 157/351] tf.identity should use backing_device so that the output tensor has memory backed on the correct device. PiperOrigin-RevId: 234019671 --- tensorflow/python/ops/array_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 977467d222..8db23c467a 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -69,7 +69,7 @@ def identity(input, name=None): # pylint: disable=redefined-builtin """ if context.executing_eagerly() and not hasattr(input, "graph"): input = ops.convert_to_tensor(input) - in_device = input.device + in_device = input.backing_device # TODO(ashankar): Does 'identity' need to invoke execution callbacks? context_device = context.context().device_name if not context_device: -- GitLab From e7d9786e66c13eb978c418527746ca3d1e11fe95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 13:49:09 -0800 Subject: [PATCH 158/351] Update Fingerprint64Map to use aliases PiperOrigin-RevId: 234022159 --- tensorflow/core/framework/resource_mgr.h | 45 ++- tensorflow/core/kernels/lookup_tables/BUILD | 14 +- .../lookup_tables/fingerprint64_map_ops.cc | 114 +++--- .../lookup_tables/lookup_table_interface.h | 119 +++--- .../kernels/lookup_tables/table_op_utils.h | 378 +++++++----------- .../lookup_tables/table_resource_utils.h | 87 ---- 6 files changed, 279 insertions(+), 478 deletions(-) delete mode 100644 tensorflow/core/kernels/lookup_tables/table_resource_utils.h diff --git a/tensorflow/core/framework/resource_mgr.h b/tensorflow/core/framework/resource_mgr.h index 18a21d744b..da547d5829 100644 --- a/tensorflow/core/framework/resource_mgr.h +++ b/tensorflow/core/framework/resource_mgr.h @@ -132,14 +132,14 @@ class ResourceMgr { // // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr - template + template Status Lookup(const string& container, const string& name, T** resource) const TF_MUST_USE_RESULT; // Similar to Lookup, but looks up multiple resources at once, with only a // single lock acquisition. If containers_and_names[i] is uninitialized // then this function does not modify resources[i]. - template + template Status LookupMany(absl::Span const> containers_and_names, std::vector>* @@ -155,7 +155,7 @@ class ResourceMgr { // // REQUIRES: std::is_base_of // REQUIRES: resource != nullptr - template + template Status LookupOrCreate(const string& container, const string& name, T** resource, std::function creator) TF_MUST_USE_RESULT; @@ -196,7 +196,7 @@ class ResourceMgr { mutable mutex mu_; std::unordered_map containers_ GUARDED_BY(mu_); - template + template Status LookupInternal(const string& container, const string& name, T** resource) const SHARED_LOCKS_REQUIRED(mu_) TF_MUST_USE_RESULT; @@ -267,7 +267,7 @@ Status CreateResource(OpKernelContext* ctx, const ResourceHandle& p, T* value); // // If the lookup is successful, the caller takes the ownership of one ref on // `*value`, and must call its `Unref()` method when it has finished using it. -template +template Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p, T** value); // Looks up multiple resources pointed by a sequence of resource handles. If @@ -437,15 +437,15 @@ Status ResourceMgr::Create(const string& container, const string& name, return DoCreate(container, MakeTypeIndex(), name, resource); } -template +template Status ResourceMgr::Lookup(const string& container, const string& name, T** resource) const { CheckDeriveFromResourceBase(); tf_shared_lock l(mu_); - return LookupInternal(container, name, resource); + return LookupInternal(container, name, resource); } -template +template Status ResourceMgr::LookupMany( absl::Span const> containers_and_names, @@ -455,8 +455,9 @@ Status ResourceMgr::LookupMany( resources->resize(containers_and_names.size()); for (size_t i = 0; i < containers_and_names.size(); ++i) { T* resource; - Status s = LookupInternal(*containers_and_names[i].first, - *containers_and_names[i].second, &resource); + Status s = LookupInternal( + *containers_and_names[i].first, *containers_and_names[i].second, + &resource); if (s.ok()) { (*resources)[i].reset(resource); } @@ -464,7 +465,18 @@ Status ResourceMgr::LookupMany( return Status::OK(); } +// Simple wrapper to allow conditional dynamic / static casts. +template +struct TypeCastFunctor { + static T* Cast(ResourceBase* r) { return static_cast(r); } +}; + template +struct TypeCastFunctor { + static T* Cast(ResourceBase* r) { return dynamic_cast(r); } +}; + +template Status ResourceMgr::LookupInternal(const string& container, const string& name, T** resource) const { ResourceBase* found = nullptr; @@ -472,12 +484,12 @@ Status ResourceMgr::LookupInternal(const string& container, const string& name, if (s.ok()) { // It's safe to down cast 'found' to T* since // typeid(T).hash_code() is part of the map key. - *resource = static_cast(found); + *resource = TypeCastFunctor::Cast(found); } return s; } -template +template Status ResourceMgr::LookupOrCreate(const string& container, const string& name, T** resource, std::function creator) { @@ -486,11 +498,11 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name, Status s; { tf_shared_lock l(mu_); - s = LookupInternal(container, name, resource); + s = LookupInternal(container, name, resource); if (s.ok()) return s; } mutex_lock l(mu_); - s = LookupInternal(container, name, resource); + s = LookupInternal(container, name, resource); if (s.ok()) return s; TF_RETURN_IF_ERROR(creator(resource)); s = DoCreate(container, MakeTypeIndex(), name, *resource); @@ -566,11 +578,12 @@ Status CreateResource(OpKernelContext* ctx, const ResourceHandle& p, T* value) { return ctx->resource_manager()->Create(p.container(), p.name(), value); } -template +template Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p, T** value) { TF_RETURN_IF_ERROR(internal::ValidateDeviceAndType(ctx, p)); - return ctx->resource_manager()->Lookup(p.container(), p.name(), value); + return ctx->resource_manager()->Lookup(p.container(), + p.name(), value); } template diff --git a/tensorflow/core/kernels/lookup_tables/BUILD b/tensorflow/core/kernels/lookup_tables/BUILD index 359caf6429..5cf628ef28 100644 --- a/tensorflow/core/kernels/lookup_tables/BUILD +++ b/tensorflow/core/kernels/lookup_tables/BUILD @@ -19,18 +19,6 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", - "@com_google_absl//absl/types:optional", - "@com_google_absl//absl/types:span", - ], -) - -cc_library( - name = "table_resource_utils", - hdrs = ["table_resource_utils.h"], - deps = [ - ":lookup_table_interface", - "//tensorflow/core:framework", - "//tensorflow/core:lib", ], ) @@ -57,8 +45,8 @@ tf_kernel_library( "fingerprint64_map_ops.cc", ], deps = [ + ":lookup_table_interface", ":table_op_utils", - ":table_resource_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", "@com_google_absl//absl/strings", diff --git a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc index a000828c4b..65487d307e 100644 --- a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc +++ b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc @@ -15,8 +15,8 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/lookup_tables/lookup_table_interface.h" #include "tensorflow/core/kernels/lookup_tables/table_op_utils.h" -#include "tensorflow/core/kernels/lookup_tables/table_resource_utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/macros.h" @@ -27,73 +27,48 @@ namespace tables { // Map x -> (Fingerprint64(x) % num_oov_buckets) + offset. // num_oov_buckets and offset are node attributes provided at construction // time. -template +template class Fingerprint64Map final - : public LookupTableInterface { + : public virtual LookupInterface, + public virtual LookupWithPrefetchInterface, + absl::Span> { public: + using key_type = KeyType; + Fingerprint64Map(int64 num_oov_buckets, int64 offset) : num_oov_buckets_(num_oov_buckets), offset_(offset) {} - mutex* GetMutex() const override { return nullptr; } - - bool UnsafeInsertOrAssign(const HeterogeneousKeyType& key, - const ValueType& value) override { - return true; - } - - Status TableUnbatchedInsertStatus() const override { - return errors::Unimplemented("Fingerprint64Map does not support inserts."); - } - - Status BatchInsertOrAssign(absl::Span keys, - absl::Span values) override { - return errors::Unimplemented("Fingerprint64Map does not support inserts."); - } - - ValueType UnsafeLookupKey( - const HeterogeneousKeyType& key_to_find) const override { - // This can cause a downcast. - return static_cast(Fingerprint64(key_to_find) % - num_oov_buckets_) + - offset_; + Status Lookup(const KeyType& key_to_find, ValueType* value) const override { + *value = LookupHelper(key_to_find); + return Status::OK(); } - Status TableUnbatchedLookupStatus() const override { return Status::OK(); } - - Status BatchLookup(absl::Span keys, - absl::Span values, - int64 prefetch_lookahead) const override { + Status Lookup(absl::Span keys, absl::Span values, + int64 prefetch_lookahead) const override { if (ABSL_PREDICT_FALSE(keys.size() != values.size())) { return errors::InvalidArgument( "keys and values do not have the same number of elements (found ", keys.size(), " vs ", values.size(), ")."); } for (size_t i = 0; i < keys.size(); ++i) { - values[i] = Fingerprint64Map::UnsafeLookupKey(keys[i]); + values[i] = LookupHelper(keys[i]); } return Status::OK(); } - const absl::optional DefaultValue() const override { - return {}; - } - - void UnsafePrefetchKey( - const HeterogeneousKeyType& key_to_find) const override {} - - size_t UnsafeSize() const override { return 0; } + mutex* GetMutex() const override { return nullptr; } - Status SizeStatus() const override { - return errors::Unimplemented( - "Fingerprint64Map does not have a concept of size."); - } + string DebugString() const override { return __PRETTY_FUNCTION__; } - bool UnsafeContainsKey( - const HeterogeneousKeyType& key_to_find) const override { - return true; + private: + ABSL_ATTRIBUTE_ALWAYS_INLINE ValueType + LookupHelper(const KeyType& key_to_find) const { + // This can cause a downcast. + return static_cast(Fingerprint64(key_to_find) % + num_oov_buckets_) + + offset_; } - private: const int64 num_oov_buckets_; const int64 offset_; TF_DISALLOW_COPY_AND_ASSIGN(Fingerprint64Map); @@ -102,9 +77,10 @@ class Fingerprint64Map final template struct Fingerprint64MapFactory { struct Functor { - template + using resource_type = Fingerprint64Map; + static Status AllocateContainer(OpKernelContext* ctx, OpKernel* kernel, - ContainerBase** container) { + Fingerprint64Map** container) { int64 num_oov_buckets; int64 offset; TF_RETURN_IF_ERROR( @@ -116,24 +92,28 @@ struct Fingerprint64MapFactory { }; }; -#define REGISTER_STRING_KERNEL(table_value_dtype) \ - REGISTER_KERNEL_BUILDER( \ - Name("Fingerprint64Map") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("heterogeneous_key_dtype") \ - .TypeConstraint("table_value_dtype"), \ - ResourceConstructionOp< \ - LookupTableInterface, \ - Fingerprint64MapFactory>::Functor>); \ - REGISTER_KERNEL_BUILDER( \ - Name("Fingerprint64Map") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("heterogeneous_key_dtype") \ - .TypeConstraint("table_value_dtype"), \ - ResourceConstructionOp, \ - Fingerprint64MapFactory>::Functor>); +template +using ResourceOp = ResourceConstructionOp< + typename Fingerprint64MapFactory< + Fingerprint64Map>::Functor, + // These are the aliases. + LookupInterface, + LookupWithPrefetchInterface, + absl::Span>>; + +#define REGISTER_STRING_KERNEL(ValueType) \ + REGISTER_KERNEL_BUILDER( \ + Name("Fingerprint64Map") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("heterogeneous_key_dtype") \ + .TypeConstraint("table_value_dtype"), \ + ResourceOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Fingerprint64Map") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("heterogeneous_key_dtype") \ + .TypeConstraint("table_value_dtype"), \ + ResourceOp); REGISTER_STRING_KERNEL(int32); REGISTER_STRING_KERNEL(int64); diff --git a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h b/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h index 0cfe44eda7..de6705d694 100644 --- a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h +++ b/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h @@ -16,11 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_ #define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_ -#include -#include - -#include "absl/types/optional.h" -#include "absl/types/span.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/mutex.h" @@ -28,90 +23,74 @@ limitations under the License. namespace tensorflow { namespace tables { -// Interface for key-value pair lookups with support for heterogeneous keys. -// This class contains two main kinds of methods: methods which operate on -// a batch of inputs and methods which do not. The latter have the prefix -// 'Unsafe'. Clients must call the corresponding status methods to determine -// whether they are safe to call within a code block. -// Implementations must guarantee thread-safety when GetMutex is used to -// synchronize method access. -template -class LookupTableInterface : public ResourceBase { +// Interface for resources with mutable state. +class SynchronizedInterface : public virtual ResourceBase { public: - using heterogeneous_key_type = HeterogeneousKeyType; - using value_type = ValueType; - using key_type = heterogeneous_key_type; - // Return value should be used to synchronize read/write access to // all public methods. If null, no synchronization is needed. virtual mutex* GetMutex() const = 0; +}; - // Insert the KV pair into the underlying table. If a key equivalent to key - // already exists in the underlying table, its corresponding value is - // overridden. Returns true only if the key was inserted for the first time. - // Undefined if TableUnbatchedInsertStatus() != OK. - virtual bool UnsafeInsertOrAssign(const HeterogeneousKeyType& key, - const ValueType& value) = 0; - - // Returns OK if it is safe to call InsertOrAssign. - // Once OK is returned, it is safe to call InsertOrAssign for the rest of the - // program. - virtual Status TableUnbatchedInsertStatus() const TF_MUST_USE_RESULT = 0; +// Interface for containers which support batch lookups. +template +class InsertOrAssignInterface : public virtual SynchronizedInterface { + public: + using value_type = ValueType; // Stores each KV pair {keys[i], values[i]} in the underlying map, overriding // pre-existing pairs which have equivalent keys. // keys and values should have the same size. - virtual Status BatchInsertOrAssign( - absl::Span keys, - absl::Span values) = 0; - - // Prefetch key_to_find into implementation defined data caches. - // Implementations are free to leave this a no-op. - // Undefined if TableUnbatchedLookupStatus() != OK. - virtual void UnsafePrefetchKey( - const HeterogeneousKeyType& key_to_find) const {} - - // Returns true if and only if the table contains key_to_find. - // Undefined if TableUnbatchedLookupStatus() != OK. - virtual bool UnsafeContainsKey( - const HeterogeneousKeyType& key_to_find) const = 0; - - // Lookup the value for key_to_find. This value must always be well-defined, - // even when ContainsKey(key_to_find) == false. When - // dv = DefaultValue() != absl::nullopt and ContainsKey(key_to_find) == false, - // dv is returned. - // Undefined if TableUnbatchedLookupStatus() != OK. - virtual ValueType UnsafeLookupKey( - const HeterogeneousKeyType& key_to_find) const = 0; - - // Returns OK if it is safe to call PrefetchKey, ContainsKey, and - // UnsafeLookupKey. - // If OK is returned, it is safe to call these methods until the next - // non-const method of this class is called. - virtual Status TableUnbatchedLookupStatus() const TF_MUST_USE_RESULT = 0; + virtual Status InsertOrAssign(KeyContext... key_context, + ValueType values) = 0; +}; + +// Interface for containers which support lookups. +template +class LookupInterface : public virtual SynchronizedInterface { + public: + using value_type = ValueType; // Lookup the values for keys and store them in values. // prefetch_lookahead is used to prefetch the key at index // i + prefetch_lookahead at the ith iteration of the implemented loop. // keys and values must have the same size. - virtual Status BatchLookup(absl::Span keys, - absl::Span values, - int64 prefetch_lookahead) const = 0; + virtual Status Lookup(KeyContext... key_context, ValueType values) const = 0; +}; - // Returns the number of elements in the table. - // Undefined if SizeStatus() != OK. - virtual size_t UnsafeSize() const = 0; +// Interface for containers which support lookups with prefetching. +template +class LookupWithPrefetchInterface : public virtual SynchronizedInterface { + public: + using value_type = ValueType; - // Returns OK if the return value of UnsafeSize() is always well-defined. - virtual Status SizeStatus() const TF_MUST_USE_RESULT = 0; + // Lookup the values for keys and store them in values. + // prefetch_lookahead is used to prefetch the key at index + // i + prefetch_lookahead at the ith iteration of the implemented loop. + // keys and values must have the same size. + virtual Status Lookup(KeyContext... key_context, ValueType values, + int64 prefetch_lookahead) const = 0; +}; - // If non-null value is returned, LookupKey returns that value only for keys - // which satisfy ContainsKey(key_to_find) == false. - virtual const absl::optional DefaultValue() const = 0; +// Interface for containers with size concepts. +// Implementations must guarantee thread-safety when GetMutex is used to +// synchronize method access. +class SizeInterface : public virtual SynchronizedInterface { + public: + // Returns the number of elements in the container. + virtual uint64 Size() const = 0; +}; - string DebugString() const override { return "A lookup table"; } +// Interface for tables which can be initialized from key and value arguments. +template +class KeyValueTableInitializerInterface : public virtual SynchronizedInterface { + public: + using value_type = ValueType; - ~LookupTableInterface() override = default; + // Lookup the values for keys and store them in values. + // prefetch_lookahead is used to prefetch the key at index + // i + prefetch_lookahead at the ith iteration of the implemented loop. + // keys and values must have the same size. + virtual Status Initialize(KeyContext... key_context, ValueType values) = 0; }; } // namespace tables diff --git a/tensorflow/core/kernels/lookup_tables/table_op_utils.h b/tensorflow/core/kernels/lookup_tables/table_op_utils.h index ad7b0db78e..b4b2742266 100644 --- a/tensorflow/core/kernels/lookup_tables/table_op_utils.h +++ b/tensorflow/core/kernels/lookup_tables/table_op_utils.h @@ -44,11 +44,11 @@ limitations under the License. namespace tensorflow { namespace tables { -// Create resources of type ContainerBase using the static method +// Create resources of type ResourceType and AliasesToRegister using // Functor::AllocateContainer(OpKernelConstruction*, OpKernel*, -// ContainerBase**) -// If the resource has already been created it will be looked up. -template +// ResourceType**). ResourceType = Functor::resource_type. +// No-op for resources which have already been created. +template class ResourceConstructionOp : public OpKernel { public: explicit ResourceConstructionOp(OpKernelConstruction* ctx) @@ -66,46 +66,86 @@ class ResourceConstructionOp : public OpKernel { } auto creator = [ctx, - this](ContainerBase** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - ContainerBase* container; - auto status = Functor::AllocateContainer(ctx, this, &container); + this](ResourceType** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + ResourceType* resource = nullptr; + auto status = Functor::AllocateContainer(ctx, this, &resource); if (ABSL_PREDICT_FALSE(!status.ok())) { - container->Unref(); + // Ideally resource is non-null only if status is OK but we try + // to compensate here. + if (resource != nullptr) { + resource->Unref(); + } return status; } if (ctx->track_allocations()) { - ctx->record_persistent_memory_allocation(container->MemoryUsed()); + ctx->record_persistent_memory_allocation(resource->MemoryUsed()); } - *ret = container; + *ret = resource; return Status::OK(); }; - ContainerBase* container_base = nullptr; + // Register the ResourceType alias. + ResourceType* resource = nullptr; + core::ScopedUnref unref_me(resource); OP_REQUIRES_OK( - ctx, cinfo_.resource_manager()->template LookupOrCreate( - cinfo_.container(), cinfo_.name(), &container_base, creator)); - core::ScopedUnref unref_me(container_base); + ctx, + cinfo_.resource_manager()->template LookupOrCreate( + cinfo_.container(), cinfo_.name(), &resource, creator)); + // Put a handle to resource in the output tensor (the other aliases will + // have the same handle). Tensor* handle; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle)); - handle->scalar()() = MakeResourceHandle( + handle->scalar()() = MakeResourceHandle( ctx, cinfo_.container(), cinfo_.name()); table_handle_set_ = true; + + // Create other alias resources. + Status status; + char dummy[sizeof...(AliasesToRegister)] = { + (status.Update(RegisterAlias(resource)), 0)...}; + (void)dummy; + OP_REQUIRES_OK(ctx, status); } ~ResourceConstructionOp() override { // If the table object was not shared, delete it. if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) { if (!cinfo_.resource_manager() - ->template Delete(cinfo_.container(), - cinfo_.name()) + ->template Delete(cinfo_.container(), + cinfo_.name()) .ok()) { // Do nothing; the resource may have been deleted by session resets. } + // Attempt to delete other resource aliases. + Status dummy_status; + char dummy[sizeof...(AliasesToRegister)] = { + (dummy_status.Update(DeleteAlias()), 0)...}; + (void)dummy; } } private: + using ResourceType = typename Functor::resource_type; + template + Status RegisterAlias(ResourceType* resource) { + auto creator = [resource](T** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = resource; + return Status::OK(); + }; + + T* alias_resource = nullptr; + core::ScopedUnref unref_me(alias_resource); + return cinfo_.resource_manager()->template LookupOrCreate( + cinfo_.container(), cinfo_.name(), &alias_resource, creator); + } + + template + Status DeleteAlias() { + return cinfo_.resource_manager()->template Delete(cinfo_.container(), + cinfo_.name()); + } + mutex mu_; bool table_handle_set_ GUARDED_BY(mu_); ContainerInfo cinfo_; @@ -120,8 +160,7 @@ class ResourceConstructionOp : public OpKernel { // If the resource has already been created it will be looked up. // Container must decrease the reference count of the FallbackTableBaseType* // constructor argument before its destructor completes. -template +template class TableWithFallbackConstructionOp : public OpKernel { public: explicit TableWithFallbackConstructionOp(OpKernelConstruction* ctx) @@ -140,13 +179,14 @@ class TableWithFallbackConstructionOp : public OpKernel { return; } + // Look up the fallback table. FallbackTableBaseType* fallback_table = nullptr; { const Tensor& table_handle = ctx->input(table_int64_args.size()); ResourceHandle handle(table_handle.scalar()()); OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup(handle.container(), - handle.name(), &fallback_table)); + ctx, ctx->resource_manager()->Lookup( + handle.container(), handle.name(), &fallback_table)); } mutex_lock l(mu_); @@ -156,51 +196,93 @@ class TableWithFallbackConstructionOp : public OpKernel { } auto creator = [ctx, this, fallback_table]( - ContainerBase** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + ResourceType** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { // container construction logic can't be merged with // ResourceConstructionOp because Container constructor requires an // input which can only be constructed if the resource manager // internal lock is not already held. - ContainerBase* container; + ResourceType* resource = nullptr; auto status = - Functor::AllocateContainer(ctx, this, fallback_table, &container); + Functor::AllocateContainer(ctx, this, fallback_table, &resource); if (ABSL_PREDICT_FALSE(!status.ok())) { - container->Unref(); + // Ideally resource is non-null only if status is OK but we try + // to compensate here. + if (resource != nullptr) { + resource->Unref(); + } return status; } if (ctx->track_allocations()) { - ctx->record_persistent_memory_allocation(container->MemoryUsed()); + ctx->record_persistent_memory_allocation(resource->MemoryUsed()); } - *ret = container; + *ret = resource; return Status::OK(); }; - ContainerBase* table = nullptr; - OP_REQUIRES_OK( - ctx, cinfo_.resource_manager()->template LookupOrCreate( - cinfo_.container(), cinfo_.name(), &table, creator)); + // Register the ResourceType alias. + ResourceType* table = nullptr; core::ScopedUnref unref_me(table); + OP_REQUIRES_OK( + ctx, + cinfo_.resource_manager()->template LookupOrCreate( + cinfo_.container(), cinfo_.name(), &table, creator)); + // Put a handle to resource in the output tensor (the other aliases will + // have the same handle). Tensor* handle; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle)); - handle->scalar()() = MakeResourceHandle( + handle->scalar()() = MakeResourceHandle( ctx, cinfo_.container(), cinfo_.name()); table_handle_set_ = true; + + // Create other alias resources. + Status status; + char dummy[sizeof...(AliasesToRegister)] = { + (status.Update(RegisterAlias(table)), 0)...}; + (void)dummy; + OP_REQUIRES_OK(ctx, status); } ~TableWithFallbackConstructionOp() override { // If the table object was not shared, delete it. if (table_handle_set_ && cinfo_.resource_is_private_to_kernel()) { if (!cinfo_.resource_manager() - ->template Delete(cinfo_.container(), - cinfo_.name()) + ->template Delete(cinfo_.container(), + cinfo_.name()) .ok()) { // Do nothing; the resource may have been deleted by session resets. } + // Attempt to delete other resource aliases. + Status dummy_status; + char dummy[sizeof...(AliasesToRegister)] = { + (dummy_status.Update(DeleteAlias()), 0)...}; + (void)dummy; } } private: + using ResourceType = typename Functor::resource_type; + using FallbackTableBaseType = typename Functor::fallback_table_type; + + template + Status RegisterAlias(ResourceType* resource) { + auto creator = [resource](T** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = resource; + return Status::OK(); + }; + + T* alias_resource = nullptr; + core::ScopedUnref unref_me(alias_resource); + return cinfo_.resource_manager()->template LookupOrCreate( + cinfo_.container(), cinfo_.name(), &alias_resource, creator); + } + + template + Status DeleteAlias() { + return cinfo_.resource_manager()->template Delete(cinfo_.container(), + cinfo_.name()); + } + mutex mu_; bool table_handle_set_ GUARDED_BY(mu_); ContainerInfo cinfo_; @@ -209,33 +291,29 @@ class TableWithFallbackConstructionOp : public OpKernel { TF_DISALLOW_COPY_AND_ASSIGN(TableWithFallbackConstructionOp); }; -// Used to insert tensors into a container. -template -class HeterogeneousLookupTableInsertOrAssignOp : public OpKernel { +// Lookup a table of type ResourceAlias and insert the passed in keys and +// values tensors using Functor::TensorInsert(keys, values, table). +template +class LookupTableInsertOp : public OpKernel { public: - explicit HeterogeneousLookupTableInsertOrAssignOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} + explicit LookupTableInsertOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { OpInputList table_int64_args; OP_REQUIRES_OK(ctx, ctx->input_list("table_int64_args", &table_int64_args)); const size_t tensor_index_offset = table_int64_args.size(); + // Business logic for checking tensor shapes, etc, is delegated to the + // Functor. const Tensor& keys = ctx->input(tensor_index_offset + 1); const Tensor& values = ctx->input(tensor_index_offset + 2); - if (ABSL_PREDICT_FALSE(keys.NumElements() != values.NumElements())) { - ctx->SetStatus(errors::InvalidArgument( - "keys and values do not have the same number of elements: ", - keys.NumElements(), " vs ", values.NumElements())); - return; - } const Tensor& table_handle = ctx->input(tensor_index_offset); ResourceHandle handle(table_handle.scalar()()); - Container* table; - OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup(handle.container(), - handle.name(), &table)); + ResourceAlias* table; core::ScopedUnref unref_me(table); + OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup( + handle.container(), handle.name(), &table)); int memory_used_before = 0; if (ctx->track_allocations()) { @@ -244,9 +322,9 @@ class HeterogeneousLookupTableInsertOrAssignOp : public OpKernel { auto* mutex = table->GetMutex(); if (mutex != nullptr) { mutex_lock lock(*mutex); - OP_REQUIRES_OK(ctx, TensorInsert(keys, values, table)); + OP_REQUIRES_OK(ctx, Functor::TensorInsert(keys, values, table)); } else { - OP_REQUIRES_OK(ctx, TensorInsert(keys, values, table)); + OP_REQUIRES_OK(ctx, Functor::TensorInsert(keys, values, table)); } if (ctx->track_allocations()) { ctx->record_persistent_memory_allocation(table->MemoryUsed() - @@ -255,74 +333,17 @@ class HeterogeneousLookupTableInsertOrAssignOp : public OpKernel { } private: - // Non-variant InsertKeyTensorType which is the same as Container::key_type. - // No need to static_cast. - template - absl::enable_if_t< - IsValidDataType::value && - std::is_same::value, - Status> - TensorInsert(const Tensor& keys, const Tensor& values, - Container* table) const { - const auto keys_flat = keys.flat(); - const auto values_flat = values.flat(); - return table->BatchInsertOrAssign( - absl::MakeSpan(keys_flat.data(), keys_flat.size()), - absl::MakeSpan(values_flat.data(), values_flat.size())); - } - - // Non-variant InsertKeyTensorType which is otherwise convertible to - // Container::key_type. - template - absl::enable_if_t< - IsValidDataType::value && - !std::is_same::value && - std::is_convertible::value, - Status> - TensorInsert(const Tensor& keys, const Tensor& values, - Container* table) const { - const auto keys_flat = keys.flat(); - std::vector keys_vec; - const auto keys_size = keys_flat.size(); - keys_vec.reserve(keys_size); - for (size_t i = 0; i < keys_size; ++i) { - keys_vec.push_back( - static_cast(keys_flat(i))); - } - const auto values_flat = values.flat(); - return table->BatchInsertOrAssign( - keys_vec, absl::MakeSpan(values_flat.data(), values_flat.size())); - } - - // Variant InsertKeyTensorType; the wrapped type is convertible to - // Container::key_type. - template - absl::enable_if_t< - !IsValidDataType::value && - std::is_convertible::value, - Status> - TensorInsert(const Tensor& keys, const Tensor& values, - Container* table) const { - const auto keys_flat = keys.flat(); - std::vector keys_vec; - keys_vec.reserve(keys_flat.size()); - for (size_t i = 0; i < keys_flat.size(); ++i) { - keys_vec.emplace_back( - *keys_flat(i).get()); - } - const auto values_flat = values.flat(); - return table->BatchInsertOrAssign( - keys_vec, absl::MakeSpan(values_flat.data(), values_flat.size())); - } + TF_DISALLOW_COPY_AND_ASSIGN(LookupTableInsertOp); }; -// Used for tensor lookups. -template -class HeterogeneousLookupTableFindOp : public OpKernel { +// Lookup a table of type ResourceAlias and look up the passed in keys using +// Functor::TensorLookup( +// table, keys, prefetch_lookahead, num_keys_per_thread, threadpool, out). +template +class LookupTableFindOp : public OpKernel { public: - explicit HeterogeneousLookupTableFindOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} + explicit LookupTableFindOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} void Compute(OpKernelContext* ctx) override { OpInputList table_int64_args; @@ -370,10 +391,10 @@ class HeterogeneousLookupTableFindOp : public OpKernel { const Tensor& table_handle = ctx->input(tensor_index_offset); ResourceHandle handle(table_handle.scalar()()); - Container* table; - OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup(handle.container(), - handle.name(), &table)); + ResourceAlias* table; core::ScopedUnref unref_me(table); + OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup( + handle.container(), handle.name(), &table)); auto* mutex = table->GetMutex(); auto* threadpool = ctx->device()->tensorflow_cpu_worker_threads()->workers; @@ -382,112 +403,20 @@ class HeterogeneousLookupTableFindOp : public OpKernel { // writer lock here. mutex_lock lock(*mutex); OP_REQUIRES_OK( - ctx, TensorLookup(*table, prefetch_lookahead, num_keys_per_thread, - keys, out, threadpool)); + ctx, Functor::TensorLookup(*table, keys, prefetch_lookahead, + num_keys_per_thread, threadpool, out)); } else { OP_REQUIRES_OK( - ctx, TensorLookup(*table, prefetch_lookahead, num_keys_per_thread, - keys, out, threadpool)); - } - } - - private: - // keys and *values arguments to TensorLookup must have the same number of - // elements. This is guaranteed above. - - // 'Simple' types below are types which are not natively supported in TF. - // Simple LookupKeyTensorType which is the same as Container::key_type. - template - absl::enable_if_t< - IsValidDataType::value && - std::is_same::value, - Status> - TensorLookup(Container& table, int64 prefetch_lookahead, - int64 num_keys_per_thread, const Tensor& keys, Tensor* values, - thread::ThreadPool* threadpool) const { - const auto keys_flat = keys.flat(); - const auto keys_size = keys_flat.size(); - auto key_span = absl::MakeSpan(keys_flat.data(), keys_size); - auto value_span = absl::MakeSpan(values->flat().data(), - values->NumElements()); - return MultithreadedTensorLookup(table, prefetch_lookahead, - num_keys_per_thread, key_span, value_span, - threadpool); - } - - // Try to implicitly convert all other simple LookupKeyTensorTypes to - // Container::key_type. - template - absl::enable_if_t< - IsValidDataType::value && - !std::is_same::value, - Status> - TensorLookup(Container& table, int64 prefetch_lookahead, - int64 num_keys_per_thread, const Tensor& keys, Tensor* values, - thread::ThreadPool* threadpool) const { - const auto keys_flat = keys.flat(); - std::vector keys_vec; - const auto keys_size = keys_flat.size(); - keys_vec.reserve(keys_size); - for (size_t i = 0; i < keys_size; ++i) { - keys_vec.emplace_back(keys_flat(i)); - } - absl::Span key_span(keys_vec); - auto value_span = absl::MakeSpan(values->flat().data(), - values->NumElements()); - return MultithreadedTensorLookup(table, prefetch_lookahead, - num_keys_per_thread, key_span, value_span, - threadpool); - } - - // Non-simple LookupKeyTensorType. We'll try an implicit conversion to - // Container::key_type. - template - absl::enable_if_t::value, Status> - TensorLookup(Container& table, int64 prefetch_lookahead, - int64 num_keys_per_thread, const Tensor& keys, Tensor* values, - thread::ThreadPool* threadpool) const { - const auto keys_flat = keys.flat(); - std::vector keys_vec; - const auto keys_size = keys_flat.size(); - keys_vec.reserve(keys_size); - for (size_t i = 0; i < keys_size; ++i) { - keys_vec.emplace_back( - *keys_flat(i).get()); + ctx, Functor::TensorLookup(*table, keys, prefetch_lookahead, + num_keys_per_thread, threadpool, out)); } - absl::Span key_span(keys_vec); - auto value_span = absl::MakeSpan(values->flat().data(), - values->NumElements()); - return MultithreadedTensorLookup(table, prefetch_lookahead, - num_keys_per_thread, key_span, value_span, - threadpool); - } - - // Wrapper around table.BatchLookup which permits sharding across cores. - template - Status MultithreadedTensorLookup(Container& table, int64 prefetch_lookahead, - int64 num_keys_per_thread, - absl::Span keys, absl::Span values, - thread::ThreadPool* threadpool) const { - mutex temp_mutex; // Protect status. - Status status; - auto lookup_keys = [&, this](int64 begin, int64 end) { - auto temp_status = table.BatchLookup(keys.subspan(begin, end - begin), - values.subspan(begin, end - begin), - prefetch_lookahead); - if (ABSL_PREDICT_FALSE(!temp_status.ok())) { - mutex_lock lock(temp_mutex); - status.Update(temp_status); - } - }; - threadpool->TransformRangeConcurrently(num_keys_per_thread /* block_size */, - keys.size(), lookup_keys); - return status; } }; -// Op that returns the size of a container. -template +// Lookup a container of type ResourceAlias and return its size using +// Functor::Size(container, &size). +template class ContainerSizeOp : public OpKernel { public: explicit ContainerSizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} @@ -495,11 +424,10 @@ class ContainerSizeOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& container_handle = ctx->input(0); ResourceHandle handle(container_handle.scalar()()); - Container* container; - OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup( - handle.container(), handle.name(), &container)); + ResourceAlias* container; core::ScopedUnref unref_me(container); - OP_REQUIRES_OK(ctx, container->SizeStatus()); + OP_REQUIRES_OK(ctx, ctx->resource_manager()->Lookup( + handle.container(), handle.name(), &container)); Tensor* out; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &out)); @@ -507,9 +435,9 @@ class ContainerSizeOp : public OpKernel { auto* mutex = container->GetMutex(); if (mutex != nullptr) { tf_shared_lock lock(*mutex); - out->scalar()() = container->UnsafeSize(); + OP_REQUIRES_OK(ctx, Functor::Size(*container, &out->scalar()())); } else { - out->scalar()() = container->UnsafeSize(); + OP_REQUIRES_OK(ctx, Functor::Size(*container, &out->scalar()())); } } }; diff --git a/tensorflow/core/kernels/lookup_tables/table_resource_utils.h b/tensorflow/core/kernels/lookup_tables/table_resource_utils.h deleted file mode 100644 index 742086cb21..0000000000 --- a/tensorflow/core/kernels/lookup_tables/table_resource_utils.h +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_ -#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_ - -#include - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/kernels/lookup_tables/lookup_table_interface.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/platform/mutex.h" - -namespace tensorflow { -namespace tables { - -// Parent class for tables with support for multithreaded synchronization. -template -class LookupTableWithSynchronization - : public LookupTableInterface { - public: - LookupTableWithSynchronization(bool enable_synchronization) { - if (enable_synchronization) { - mutex_ = absl::make_unique(); - } - } - - // Mutex for synchronizing access to unsynchronized methods. - mutex* GetMutex() const override { return mutex_.get(); } - - private: - // Use this for locking. - mutable std::unique_ptr mutex_; -}; - -// Parent class for tables which can be constructed with arbitrary -// lookup fallbacks. -// Since LookupTableInterface::LookupKey assumes that all keys can be mapped -// to values, LookupTableWithFallbackInterface allows clients to implement -// two-stage lookups. If the first key lookup fails, clients can choose -// to perform a fallback lookup using an externally supplied table. -template > -class LookupTableWithFallbackInterface - : public LookupTableWithSynchronization { - public: - LookupTableWithFallbackInterface(bool enable_synchronization, - const FallbackTableBaseType* fallback_table) - : LookupTableWithSynchronization( - enable_synchronization), - fallback_table_(fallback_table) {} - - // Clients are required to fail when ctx is set to a not-OK status in - // the constructor so this dereference is safe. - const FallbackTableBaseType& fallback_table() const { - return *fallback_table_; - } - - ~LookupTableWithFallbackInterface() override { - if (fallback_table_ != nullptr) { - fallback_table_->Unref(); - } - } - - private: - const FallbackTableBaseType* fallback_table_; -}; - -} // namespace tables -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_RESOURCE_UTILS_H_ -- GitLab From 6249068668847933066ee2119cc6c88e4c57009f Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 14 Feb 2019 14:31:43 -0800 Subject: [PATCH 159/351] Adds tests to lite_test.py to test functions in 1.X. 1. Adds support for a Variable --> Identity --> ReadVariableOp graph to convert_variables_to_constants. 2. Adds a Grappler pass before freezing the graph in lite.py in order to inline. PiperOrigin-RevId: 234030833 --- tensorflow/lite/python/lite.py | 103 ++++++++++-------- tensorflow/lite/python/lite_test.py | 45 ++++++++ .../model_coverage/model_coverage_lib_test.py | 22 ++++ .../python/framework/graph_util_impl.py | 20 +++- .../python/framework/graph_util_test.py | 14 ++- 5 files changed, 148 insertions(+), 56 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index 40efbe5392..a05dc28f79 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -38,6 +38,10 @@ from six import PY3 from google.protobuf import text_format as _text_format from google.protobuf.message import DecodeError +from tensorflow.core.framework import graph_pb2 as _graph_pb2 +from tensorflow.core.protobuf import config_pb2 as _config_pb2 +from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 as _rewriter_config_pb2 from tensorflow.lite.python import lite_constants as constants from tensorflow.lite.python.convert import build_toco_convert_protos # pylint: disable=unused-import from tensorflow.lite.python.convert import ConverterError # pylint: disable=unused-import @@ -54,15 +58,12 @@ from tensorflow.lite.python.interpreter import Interpreter # pylint: disable=un from tensorflow.lite.python.op_hint import convert_op_hints_to_stubs # pylint: disable=unused-import from tensorflow.lite.python.op_hint import OpHint # pylint: disable=unused-import from tensorflow.lite.python.optimize import calibrator as _calibrator -from tensorflow.core.framework import graph_pb2 as _graph_pb2 -from tensorflow.core.protobuf import rewriter_config_pb2 as _rewriter_config_pb2 -from tensorflow.core.protobuf import config_pb2 as _config_pb2 -from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2 from tensorflow.python import keras as _keras from tensorflow.python.client import session as _session from tensorflow.python.framework import graph_util as _tf_graph_util from tensorflow.python.framework import ops as _ops from tensorflow.python.framework.errors_impl import NotFoundError as _NotFoundError +from tensorflow.python.framework.errors_impl import OpError as _OpError from tensorflow.python.framework.importer import import_graph_def as _import_graph_def from tensorflow.python.grappler import tf_optimizer as _tf_optimizer from tensorflow.python.lib.io import file_io as _file_io @@ -73,35 +74,6 @@ from tensorflow.python.util import deprecation as _deprecation from tensorflow.python.util.tf_export import tf_export as _tf_export -def _run_graph_optimizations(graph_def, input_arrays, output_arrays): - """Apply standard TensorFlow optimizations to the graph_def. - - Args: - graph_def: Frozen GraphDef to be optimized. - input_arrays: List of arrays that are considered inputs of the graph. - output_arrays: List of arrays that are considered outputs of the graph. - - Returns: - A new, optimized GraphDef. - """ - meta_graph = _export_meta_graph(graph_def=graph_def) - - # We need to add a collection called 'train_op' so that grappler - # knows what the outputs are. - fetch_collection = _meta_graph_pb2.CollectionDef() - for array in input_arrays + output_arrays: - fetch_collection.node_list.value.append(array.name) - meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) - - config = _config_pb2.ConfigProto() - rewrite_options = config.graph_options.rewrite_options - rewrite_options.layout_optimizer = _rewriter_config_pb2.RewriterConfig.ON - # Avoid remapping as it creates ops like _FusedConv2D, which are not - # supported by TF Lite. - rewrite_options.remapping = _rewriter_config_pb2.RewriterConfig.OFF - return _tf_optimizer.OptimizeGraph(config, meta_graph) - - @_tf_export("lite.Optimize") class Optimize(enum.Enum): """Enum defining the optimizations to apply when generating tflite graphs. @@ -311,7 +283,7 @@ class TFLiteConverter(object): Returns: TFLiteConverter class. """ - graph_def = _freeze_graph(sess, output_tensors) + graph_def = _freeze_graph(sess, input_tensors, output_tensors) return cls(graph_def, input_tensors, output_tensors) @classmethod @@ -484,7 +456,7 @@ class TFLiteConverter(object): output_tensors = keras_model.outputs _set_tensor_shapes(input_tensors, input_shapes) - graph_def = _freeze_graph(sess, output_tensors) + graph_def = _freeze_graph(sess, input_tensors, output_tensors) return cls(graph_def, input_tensors, output_tensors) def __setattr__(self, name, value): @@ -586,26 +558,26 @@ class TFLiteConverter(object): "dump_graphviz_video": self.dump_graphviz_video } - optimized_graph = None - if self.inference_type == constants.QUANTIZED_UINT8: - optimized_graph = self._graph_def - else: + # Run a Grappler pass if it is possible. + graph_def = self._graph_def + if self.inference_type != constants.QUANTIZED_UINT8: try: - optimized_graph = _run_graph_optimizations( + graph_def = _run_graph_optimizations( self._graph_def, self._input_tensors, self._output_tensors) - except Exception: - optimized_graph = self._graph_def + except (_OpError, ValueError): + print("Warning: Grappler optimization pass failed. " + "If this behavior is unexpected, please file a bug.") # Converts model. if self._has_valid_tensors(): result = _toco_convert_impl( - input_data=optimized_graph, + input_data=graph_def, input_tensors=self._input_tensors, output_tensors=self._output_tensors, **converter_kwargs) else: result = _toco_convert_graph_def( - input_data=optimized_graph, + input_data=graph_def, input_arrays_with_shape=self._input_arrays_with_shape, output_arrays=self._output_arrays, **converter_kwargs) @@ -710,6 +682,35 @@ class TocoConverter(object): input_shapes, output_arrays) +def _run_graph_optimizations(graph_def, input_arrays, output_arrays): + """Apply standard TensorFlow optimizations to the graph_def. + + Args: + graph_def: Frozen GraphDef to be optimized. + input_arrays: List of arrays that are considered inputs of the graph. + output_arrays: List of arrays that are considered outputs of the graph. + + Returns: + A new, optimized GraphDef. + """ + meta_graph = _export_meta_graph(graph_def=graph_def) + + # We need to add a collection called 'train_op' so that grappler + # knows what the outputs are. + fetch_collection = _meta_graph_pb2.CollectionDef() + for array in input_arrays + output_arrays: + fetch_collection.node_list.value.append(array.name) + meta_graph.collection_def["train_op"].CopyFrom(fetch_collection) + + config = _config_pb2.ConfigProto() + rewrite_options = config.graph_options.rewrite_options + rewrite_options.layout_optimizer = _rewriter_config_pb2.RewriterConfig.OFF + # Avoid remapping as it creates ops like _FusedConv2D, which are not + # supported by TF Lite. + rewrite_options.remapping = _rewriter_config_pb2.RewriterConfig.OFF + return _tf_optimizer.OptimizeGraph(config, meta_graph) + + def _is_frozen_graph(sess): """Determines if the graph is frozen. @@ -728,22 +729,28 @@ def _is_frozen_graph(sess): return True -def _freeze_graph(sess, output_tensors): +def _freeze_graph(sess, input_tensors, output_tensors): """Returns a frozen GraphDef. - Freezes a graph with Variables in it. Otherwise the existing GraphDef is - returned. + Runs a Grappler pass and freezes a graph with Variables in it. Otherwise the + existing GraphDef is returned. The Grappler pass is only run on models that + are frozen in order to inline the functions in the graph. Args: sess: TensorFlow Session. + input_tensors: List of input tensors. output_tensors: List of output tensors (only .name is used from this). Returns: Frozen GraphDef. """ + # Runs a Grappler pass in order to inline any functions in the graph. + graph_def = _run_graph_optimizations(sess.graph_def, input_tensors, + output_tensors) + if not _is_frozen_graph(sess): output_arrays = [_tensor_name(tensor) for tensor in output_tensors] return _tf_graph_util.convert_variables_to_constants( - sess, sess.graph_def, output_arrays) + sess, graph_def, output_arrays) else: return sess.graph_def diff --git a/tensorflow/lite/python/lite_test.py b/tensorflow/lite/python/lite_test.py index d41b7a75fd..810de4be54 100644 --- a/tensorflow/lite/python/lite_test.py +++ b/tensorflow/lite/python/lite_test.py @@ -27,13 +27,16 @@ from tensorflow.lite.python import lite_constants from tensorflow.lite.python.interpreter import Interpreter from tensorflow.python import keras from tensorflow.python.client import session +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables from tensorflow.python.ops.variables import global_variables_initializer as _global_variables_initializer from tensorflow.python.platform import gfile from tensorflow.python.platform import resource_loader @@ -597,6 +600,48 @@ class FromSessionTest(test_util.TensorFlowTestCase): interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() + def testFunctions(self): + """Tests tf.function in 1.X.""" + + @def_function.function + def plus_placeholder(x, placeholder): + return x + placeholder + + with ops.Graph().as_default(): + placeholder = array_ops.placeholder( + dtype=dtypes.float32, shape=[1], name='input') + variable_node = variables.Variable(1.0, name='variable_node') + defun_node = plus_placeholder(variable_node, placeholder) + output_node = math_ops.multiply(defun_node, 2.0, name='output_node') + + # Initialize variables in the model. + sess = session.Session() + sess.run(variables.variables_initializer([variable_node])) + + # Convert model and ensure model is not None. + converter = lite.TFLiteConverter.from_session(sess, [placeholder], + [output_node]) + tflite_model = converter.convert() + self.assertTrue(tflite_model) + + # Check values from converted model. + interpreter = Interpreter(model_content=tflite_model) + interpreter.allocate_tensors() + + input_details = interpreter.get_input_details() + self.assertEqual(1, len(input_details)) + self.assertEqual('input', input_details[0]['name']) + self.assertEqual(np.float32, input_details[0]['dtype']) + self.assertTrue(([1] == input_details[0]['shape']).all()) + self.assertEqual((0., 0.), input_details[0]['quantization']) + + output_details = interpreter.get_output_details() + self.assertEqual(1, len(output_details)) + self.assertEqual('output_node', output_details[0]['name']) + self.assertEqual(np.float32, output_details[0]['dtype']) + self.assertTrue(([1] == output_details[0]['shape']).all()) + self.assertEqual((0., 0.), output_details[0]['quantization']) + @test_util.run_v1_only('b/120545219') class FromFrozenGraphFile(test_util.TensorFlowTestCase): diff --git a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py index 4e329ac97d..d7dd4c43a3 100644 --- a/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py +++ b/tensorflow/lite/testing/model_coverage/model_coverage_lib_test.py @@ -26,8 +26,10 @@ from tensorflow.lite.python import lite from tensorflow.lite.testing.model_coverage import model_coverage_lib as model_coverage from tensorflow.python import keras from tensorflow.python.client import session +from tensorflow.python.eager import def_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -70,6 +72,26 @@ class EvaluateFrozenGraph(test.TestCase): model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'], ['add', 'Mean']) + @test_util.run_v1_only('b/120545219') + def testFunctions(self): + + @def_function.function + def plus_placeholder(x, placeholder): + return x + placeholder + + with ops.Graph().as_default(): + placeholder = array_ops.placeholder( + dtype=dtypes.float32, shape=[1], name='input') + variable_node = constant_op.constant(1.0, name='variable_node') + defun_node = plus_placeholder(variable_node, placeholder) + _ = math_ops.multiply(defun_node, 2.0, name='output_node') + + # Initialize variables in the model. + sess = session.Session() + + filename = self._saveFrozenGraph(sess) + model_coverage.test_frozen_graph(filename, ['input'], ['output_node']) + def _getQuantizedModel(self): np.random.seed(0) with session.Session().as_default() as sess: diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index a46fccc513..50cdb7a15d 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -248,6 +248,15 @@ def convert_variables_to_constants(sess, found_variables = {} variable_names = [] variable_dict_names = [] + identity_ops_input_map = {} + + def is_found_variable(input_tensor_name): + # Determines if the `input_tensor_name` is in `found_variables` or is an + # Identity op with an input that is in `found_variables`. + return ((input_tensor_name in found_variables) or + (input_tensor_name in identity_ops_input_map and + identity_ops_input_map[input_tensor_name] in found_variables)) + for node in inference_graph.node: if node.op in ["Variable", "VariableV2", "VarHandleOp"]: variable_name = node.name @@ -261,6 +270,9 @@ def convert_variables_to_constants(sess, variable_names.append(variable_name + "/Read/ReadVariableOp:0") else: variable_names.append(variable_name + ":0") + elif node.op == "Identity": + # Creates a map of Identity node names to the input names. + identity_ops_input_map[node.name] = node.input[0].split(":")[0] if variable_names: returned_variables = sess.run(variable_names) else: @@ -283,11 +295,15 @@ def convert_variables_to_constants(sess, tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 - elif input_node.op == "ReadVariableOp" and ( - input_node.input[0] in found_variables): + elif (input_node.op == "ReadVariableOp" and + is_found_variable(input_node.input[0])): # The preceding branch converts all VarHandleOps of ResourceVariables to # constants, so we need to convert the associated ReadVariableOps to # Identity ops. + # + # Handles the following cases: + # Variable --> ReadVariableOp + # Variable --> Identity --> ReadVariableOp output_node.op = "Identity" output_node.name = input_node.name output_node.input.extend([input_node.input[0]]) diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index dd26b8a78e..6802586ef6 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -217,16 +217,18 @@ class DeviceFunctionsTest(test.TestCase): self.assertNear(4.0, output, 0.00001) variable_graph_def = sess.graph.as_graph_def() - # First get the constant_graph_def when variable_names_whitelist is set, - # note that if variable_names_whitelist is not set an error will be - # thrown because unused_variable_node is not initialized. + # Get the constant_graph_def. constant_graph_def = graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_whitelist=set(["variable_node"])) + sess, variable_graph_def, ["output_node"]) + # Ensure the library is copied and there are no variables after + # freezing. self.assertEqual(variable_graph_def.library, constant_graph_def.library) + for node in constant_graph_def.node: + self.assertNotIn( + node.op, + ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) def testConvertVariablesToConsts(self): self._test_variable_to_const_conversion(use_resource=False) -- GitLab From d9d4dccb41c45508653d5e7e507d65303151c40b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 14:44:00 -0800 Subject: [PATCH 160/351] DepthwiseConv dot-product optimization: C model code (non-depth-mult) part I. PiperOrigin-RevId: 234033274 --- tensorflow/lite/kernels/internal/BUILD | 6 +- .../internal/depthwiseconv_quantized_test.cc | 73 +- .../internal/optimized/depthwiseconv_uint8.h | 17 +- .../depthwiseconv_uint8_3x3_filter.h | 62 +- .../depthwiseconv_uint8_transitional.h | 711 ++++++++++++++++++ .../internal/reference/depthwiseconv_uint8.h | 7 +- 6 files changed, 821 insertions(+), 55 deletions(-) create mode 100644 tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD index 37a99bcc0c..2eb31c74b8 100644 --- a/tensorflow/lite/kernels/internal/BUILD +++ b/tensorflow/lite/kernels/internal/BUILD @@ -587,7 +587,10 @@ cc_test( cc_test( name = "depthwiseconv_quantized_test", - srcs = ["depthwiseconv_quantized_test.cc"], + srcs = [ + "depthwiseconv_quantized_test.cc", + "optimized/depthwiseconv_uint8_transitional.h", + ], shard_count = 2, deps = [ ":optimized_base", @@ -596,6 +599,7 @@ cc_test( ":types", "@com_google_absl//absl/strings", "@com_google_googletest//:gtest_main", + "@gemmlowp", ], ) diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc index b396e6256c..a990e57cdf 100644 --- a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc +++ b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -30,6 +30,7 @@ limitations under the License. #include "absl/strings/substitute.h" #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h" #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h" +#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h" #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h" namespace tflite { @@ -57,7 +58,7 @@ enum class CoverageExtension { // The TestParam structure below is the preferred parameterization of tests. A // tuple version is defined in order to support value-parameterized tests. -typedef std::tuple TestParamTuple; @@ -82,7 +83,8 @@ struct TestParam { param.test_depth_multiplier); } - DepthwiseConvInvocation forced_invocation = DepthwiseConvInvocation::kNone; + DepthwiseConvImplementation forced_invocation = + DepthwiseConvImplementation::kNone; int tests_to_run = 0; bool test_stride = false; bool test_pad = false; @@ -99,7 +101,7 @@ inline void DispatchDepthwiseConv( const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, uint8* output_data) { switch (test_param.forced_invocation) { - case DepthwiseConvInvocation::kUseNeon3x3: { + case DepthwiseConvImplementation::kUseNeon3x3: { // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) @@ -114,7 +116,7 @@ inline void DispatchDepthwiseConv( // Check that parameter combination is supported. const bool basic_3x3_kernel_supported = - optimized_ops::Fast3x3FilterKernelSupported( + optimized_ops::depthwise_conv::Fast3x3FilterKernelSupported( input_shape, filter_shape, stride_width, stride_height, dilation_width_factor, dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, output_shift); @@ -127,7 +129,7 @@ inline void DispatchDepthwiseConv( << " output_height = " << output_shape.Dims(1); // Call kernel optimized for depthwise convolutions using 3x3 filters. - optimized_ops::DepthwiseConv3x3Filter( + optimized_ops::depthwise_conv::DepthwiseConv3x3Filter( params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, output_data); return; @@ -135,23 +137,23 @@ inline void DispatchDepthwiseConv( break; #endif } - case DepthwiseConvInvocation::kUseNeon3x3DotProduct: - case DepthwiseConvInvocation::kUseCModel3x3DotProduct: - case DepthwiseConvInvocation::kUseUnwound3x3DotProduct: - case DepthwiseConvInvocation::kUseIntrinsics3x3DotProduct: + case DepthwiseConvImplementation::kUseNeon3x3DotProduct: + case DepthwiseConvImplementation::kUseCModel3x3DotProduct: + case DepthwiseConvImplementation::kUseUnwound3x3DotProduct: + case DepthwiseConvImplementation::kUseIntrinsics3x3DotProduct: // TODO(b/118426582) Placeholder for future dispatches. break; - case DepthwiseConvInvocation::kUseGenericKernel: { - optimized_ops::DepthwiseConvGeneral(params, input_shape, input_data, - filter_shape, filter_data, bias_shape, - bias_data, output_shape, output_data); + case DepthwiseConvImplementation::kUseGenericKernel: { + optimized_ops::depthwise_conv::DepthwiseConvGeneral( + params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); return; } - case DepthwiseConvInvocation::kNone: + case DepthwiseConvImplementation::kNone: default: break; } - EXPECT_EQ(test_param.forced_invocation, DepthwiseConvInvocation::kNone) + EXPECT_EQ(test_param.forced_invocation, DepthwiseConvImplementation::kNone) << "TODO(b/118426582) requested kernel was not invoked / available yet"; optimized_ops::DepthwiseConv(params, input_shape, input_data, filter_shape, filter_data, bias_shape, bias_data, output_shape, @@ -191,7 +193,7 @@ int TestOneDepthwiseConvWithGivenOutputShift( op_params.output_shift = -output_shift; switch (test_param.output_rounding) { case DepthwiseConvOutputRounding::kUpward: - reference_ops::DepthwiseConvBasicKernel< + reference_ops::depthwise_conv::DepthwiseConvBasicKernel< DepthwiseConvOutputRounding::kAwayFromZero>::Run(op_params, input_shape, input_data, @@ -449,7 +451,7 @@ bool TryTestOneDepthwiseConv3x3Filter( UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; // Adjust for, or reject, special cases. - if (test_param.forced_invocation != DepthwiseConvInvocation::kNone) { + if (test_param.forced_invocation != DepthwiseConvImplementation::kNone) { // With stride == 2 and SAME, padding width and height are the left and top // padding amounts. When there is an even input dimension, padding + 1 is // required on the right / bottom. This is not handled by these kernels, so @@ -509,7 +511,7 @@ bool TryTestOneNeonDot3x3(const TestParam& test_param, dilation_width_factor, dilation_height_factor, padding_type); } -void TestOneDepthwiseConv(DepthwiseConvInvocation forced_invocation, +void TestOneDepthwiseConv(DepthwiseConvImplementation forced_invocation, DepthwiseConvOutputRounding output_rounding) { TestParam test_param; test_param.forced_invocation = forced_invocation; @@ -519,7 +521,7 @@ void TestOneDepthwiseConv(DepthwiseConvInvocation forced_invocation, } void TestOneDepthwiseConv3x3Filter( - DepthwiseConvInvocation forced_invocation, + DepthwiseConvImplementation forced_invocation, DepthwiseConvOutputRounding output_rounding) { TestParam test_param; test_param.forced_invocation = forced_invocation; @@ -537,7 +539,7 @@ void TestOneNeonDot3x3(const TestParam& test_param) { TEST(TestDepthwiseConv, TestDepthwiseConv) { const int kTestsToRun = 10 * 1000; for (int i = 0; i < kTestsToRun; i++) { - TestOneDepthwiseConv(DepthwiseConvInvocation::kNone, + TestOneDepthwiseConv(DepthwiseConvImplementation::kNone, DepthwiseConvOutputRounding::kAwayFromZero); } } @@ -546,7 +548,7 @@ TEST(TestDepthwiseConv, TestDepthwiseConv) { TEST(TestDepthwiseConv, TestGenericKernel) { const int kTestsToRun = 10 * 1000; for (int i = 0; i < kTestsToRun; i++) { - TestOneDepthwiseConv(DepthwiseConvInvocation::kUseGenericKernel, + TestOneDepthwiseConv(DepthwiseConvImplementation::kUseGenericKernel, DepthwiseConvOutputRounding::kAwayFromZero); } } @@ -554,7 +556,7 @@ TEST(TestDepthwiseConv, TestGenericKernel) { TEST(TestDepthwiseConv, TestKernel3x3Filter) { const int kTestsToRun = 1000; for (int i = 0; i < kTestsToRun; i++) { - TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kNone, + TestOneDepthwiseConv3x3Filter(DepthwiseConvImplementation::kNone, DepthwiseConvOutputRounding::kAwayFromZero); } } @@ -564,8 +566,9 @@ TEST(TestDepthwiseConv, TestKernel3x3Filter) { TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) { const int kTestsToRun = 100; for (int i = 0; i < kTestsToRun; i++) { - TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseGenericKernel, - DepthwiseConvOutputRounding::kAwayFromZero); + TestOneDepthwiseConv3x3Filter( + DepthwiseConvImplementation::kUseGenericKernel, + DepthwiseConvOutputRounding::kAwayFromZero); } } @@ -573,7 +576,7 @@ TEST(TestDepthwiseConv, TestGenericKernel3x3Filter) { TEST(TestDepthwiseConv, TestNeon3x3Filter) { const int kTestsToRun = 3 * 1000; for (int i = 0; i < kTestsToRun; i++) { - TestOneDepthwiseConv3x3Filter(DepthwiseConvInvocation::kUseNeon3x3, + TestOneDepthwiseConv3x3Filter(DepthwiseConvImplementation::kUseNeon3x3, DepthwiseConvOutputRounding::kAwayFromZero); } } @@ -592,11 +595,11 @@ TEST_P(DepthwiseConvTest, NeonDot3x3) { INSTANTIATE_TEST_SUITE_P( Neon3x3Kernel, DepthwiseConvTest, testing::Combine( - Values(DepthwiseConvInvocation::kUseNeon3x3), // forced_invocation - Values(1000), // tests_to_run - Bool(), // test_stride - Values(false), // test_pad - Values(false), // test_depth_multiplier + Values(DepthwiseConvImplementation::kUseNeon3x3), // forced_invocation + Values(1000), // tests_to_run + Bool(), // test_stride + Values(false), // test_pad + Values(false), // test_depth_multiplier Values(DepthwiseConvOutputRounding::kAwayFromZero), // output_rounding Values(false) // loose_tolerance ), @@ -608,11 +611,11 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P( GenericKernel, DepthwiseConvTest, testing::Combine( - Values( - DepthwiseConvInvocation::kUseGenericKernel), // forced_invocation - Values(100), // tests_to_run - Bool(), // test_stride - Bool(), // test_pad + Values(DepthwiseConvImplementation:: + kUseGenericKernel), // forced_invocation + Values(100), // tests_to_run + Bool(), // test_stride + Bool(), // test_pad Bool(), // test_depth_multiplier Values(DepthwiseConvOutputRounding::kUpward), // output_rounding Values(false) // loose_tolerance diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 13629494bd..84d701676b 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -24,6 +24,7 @@ limitations under the License. namespace tflite { namespace optimized_ops { +namespace depthwise_conv { // Implementation of quantized DepthwiseConv @@ -1946,6 +1947,8 @@ inline void DepthwiseConvGeneral( } } +} // namespace depthwise_conv + inline void DepthwiseConv( const DepthwiseParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -1980,23 +1983,23 @@ inline void DepthwiseConv( // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. - if (Fast3x3FilterKernelSupported( + if (depthwise_conv::Fast3x3FilterKernelSupported( input_shape, filter_shape, stride_width, stride_height, dilation_width_factor, dilation_height_factor, pad_width, pad_height, depth_multiplier, output_shape, output_shift)) { gemmlowp::ScopedProfilingLabel specialized_label("DepthwiseConv/8bit/3x3"); - DepthwiseConv3x3Filter(params, input_shape, input_data, filter_shape, - filter_data, bias_shape, bias_data, output_shape, - output_data); + depthwise_conv::DepthwiseConv3x3Filter( + params, input_shape, input_data, filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); return; } #endif gemmlowp::ScopedProfilingLabel specialized_label( "DepthwiseConv/8bit/General"); - DepthwiseConvGeneral(params, input_shape, input_data, filter_shape, - filter_data, bias_shape, bias_data, output_shape, - output_data); + depthwise_conv::DepthwiseConvGeneral(params, input_shape, input_data, + filter_shape, filter_data, bias_shape, + bias_data, output_shape, output_data); } } // namespace optimized_ops diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index b7993c3104..f43c2b33e1 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -23,6 +23,10 @@ limitations under the License. namespace tflite { namespace optimized_ops { +namespace depthwise_conv { + +constexpr int kDepthwiseConvScratchWorkspaceSize = 10 * 10 * 64; +constexpr int kDepthwiseConvAdjustedBiasLimit = 256; // See CategorizeDotProductKernel for definitive taxonomy. enum class DotProduct3x3KernelType { @@ -61,13 +65,14 @@ inline DotProduct3x3KernelType CategorizeDotProductKernel( } } +#define STR(s) STR_UNEXPANDED(s) +#define STR_UNEXPANDED(s) #s + // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on // Jetson TX-2. This compiler does not support the offsetof() macro. #if defined(__aarch64__) && !defined(GOOGLE_L4T) #include -#define DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE 10 * 10 * 64 - // Encapsulates constant parameters used in DepthwiseConv. // 64-bit is used for types that will be added to 64-bit addresses in asm. struct DepthwiseConvParams { @@ -91,9 +96,6 @@ struct DepthwiseConvParams { int32 output_height; }; -#define STR(s) STR_UNEXPANDED(s) -#define STR_UNEXPANDED(s) #s - // Represents the number of bytes offset from the start of the // DepthwiseConvParams struct. This is used in the asm to load parameters. // Keep these values in sync with the static_asserts below. @@ -168,7 +170,46 @@ static_assert(offsetof(DepthwiseConvParams, output_width) == static_assert(offsetof(DepthwiseConvParams, output_height) == OFFSET_OUTPUT_HEIGHT, ""); +#endif +// Encapsulates constant parameters used in DepthwiseConv using dot-product ops. +// 64-bit is used for types that will be added to 64-bit addresses in asm. +// +// This structure is specifically designed for use in asm. +struct DepthwiseConvDotProdParams { + int64_t input_depth; + int64_t output_depth; + int32 workspace_height_stride; + int32 input_width_overall_micro_repeats; + int32 input_width_micro_repeats; + int32 depth_micro_repeats; + int32 inbound_block_height; + int32 residual_width; + int32 input_height_stride; + int32 stride; + int32 output_width_overall_micro_repeats; + int32 output_width_micro_repeats; + int32 output_residual_width; + int32 output_height_stride; + int32 bias_increment; + int32 padding_left; + int32 padding_right; + int32 padding_top; + int32 padding_bottom; + int32 height_macro_count; + int32 width_macro_count; + int32 outbound_block_height; + int32 workspace_width_micro_repeats; + int32 input_offset; + int32 output_offset; + int32 output_multiplier; + int32 output_shift; + int32 quantized_activation_min; + int32 quantized_activation_max; + int32 four_over_stride; +}; + +#if defined(__aarch64__) && !defined(GOOGLE_L4T) template struct DepthwiseConvWindow {}; @@ -2964,8 +3005,6 @@ struct DepthwiseConvPartial { #undef OFFSET_INPUT_HEIGHT #undef OFFSET_OUTPUT_WIDTH #undef OFFSET_OUTPUT_HEIGHT -#undef STR -#undef STR_UNEXPANDED // Copies a subset of the input designated by |input_ptr| into |output_ptr| // with the specified output dimensions. Supports output depths of 64 only as @@ -3048,7 +3087,7 @@ struct DepthwiseConvMultiRow { get_shuffle_input_size(kStrideWidth, shuffle_params.output_width)); TFLITE_DCHECK(64 * shuffle_params.input_width * shuffle_params.input_height <= - DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE); + kDepthwiseConvScratchWorkspaceSize); int32 out_x = start_x; @@ -3376,7 +3415,7 @@ inline void DepthwiseConv3x3Filter( // allocated on the stack. Eventually we will want to move it to the heap // and have it allocated outside of this function, like the im2col_array // used in gemmlowp. - uint8 shuffle_workspace[DEPTHWISECONV_SHUFFLE_WORKSPACE_SIZE]; + uint8 shuffle_workspace[kDepthwiseConvScratchWorkspaceSize]; for (int32 b = 0; b < batches; ++b) { const uint8* input_ptr = input_data + b * input_batch_size; @@ -3455,9 +3494,12 @@ inline void DepthwiseConv3x3Filter( } } } - #endif // __aarch64__ +#undef STR +#undef STR_UNEXPANDED + +} // namespace depthwise_conv } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h new file mode 100644 index 0000000000..148001f444 --- /dev/null +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h @@ -0,0 +1,711 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_ +#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_ + +// This file provides kernel implementations that are not used in shipped +// inference code, but rather (a) show how model C++ code is designed and then +// transformed into asm code, and (b) aid with maintenance and later development +// of variations. Many projects (even including, say, the classic NAG libraries) +// develop highly optimized code, but do not maintain intermediate versions. +// Often the result is incomprehensible final-version code. + +#include + +#include "fixedpoint/fixedpoint.h" +#include "tensorflow/lite/kernels/internal/common.h" +#include "tensorflow/lite/kernels/internal/compatibility.h" +#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h" +#include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h" +#include "tensorflow/lite/kernels/internal/types.h" + +namespace tflite { +namespace optimized_ops { +namespace depthwise_conv { + +// Permute filter data, and adjust bias data to account for symmetric input +// offset. Details are provided in the implementation of the +// kUseCModel3x3DotProduct version. +// +// See the comments preceding DepthwiseConvDotProduct3x3() for further notes. +template +struct ProcessPerDepth { + // Routine is contained in a static Run() method. No default template version + // is supplied, so that all implementations are deliberate choices of template + // specialization. + // + // Note that the signature of the Run() method will be designed for the asm + // implementation rather than conforming to style. +}; + +template <> +struct ProcessPerDepth {}; + +// Copy a macro block of data from the input buffer into the workspace, +// permuting data within each micro block. +// +// (a) Copy a macro block of data, padding as required along the width and +// height. +// (b) Transpose the data within each micro block. +// +// See the comments preceding DepthwiseConvDotProduct3x3() for further notes. +template +struct PackMacroBlock { + // Routine is contained in a static Run() method. No default template version + // is supplied, so that all implementations are deliberate choices of template + // specialization. + // + // Note that the signature of the Run() method will be designed for the asm + // implementation rather than conforming to style. +}; + +// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. +template +struct PackMacroBlock { + static inline void Run(int32 height_block_number, int32 width_block_number, + const uint8* input_block_data, + int8* scratch_block_data, + const DepthwiseConvDotProdParams* function_params) { + TFLITE_DCHECK(false); + return; + } +}; + +// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. +template +struct PackMacroBlock { + static inline void Run(int32 height_block_number, int32 width_block_number, + const uint8* input_block_data, + int8* scratch_block_data, + const DepthwiseConvDotProdParams* function_params) { + TFLITE_DCHECK(false); + return; + } +}; + +// Apply filter to macro block of input data and store results. Details are +// provided in the implementation of the kUseCModel3x3DotProduct version. +// +// Parameters for repeats and residual sizes are in terms of outputs. +// +// See the comments preceding DepthwiseConvDotProduct3x3() for further notes. +template +struct KernelMacroBlock { + // Routine is contained in a static Run() method. No default template version + // is supplied, so that all implementations are deliberate choices of template + // specialization. + // + // Note that the signature of the Run() method will be designed for the asm + // implementation rather than conforming to style. +}; + +// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. +template +struct KernelMacroBlock { + static inline void Run(const int8* scratch_block_data, + const int8* filter_workspace, const int32* bias_data, + uint8* output_block_data, + const DepthwiseConvDotProdParams* function_params) { + TFLITE_DCHECK(false); + return; + } +}; + +// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. +template +struct KernelMacroBlock { + static inline void Run(const int8* scratch_block_data, + const int8* filter_workspace, const int32* bias_data, + uint8* output_block_data, + const DepthwiseConvDotProdParams* function_params) { + TFLITE_DCHECK(false); + return; + } +}; + +// Top-level implementation function for 3x3 depthwise convolution using +// NEON dot-product instructions. +// +// MACRO & MICRO BLOCKS +// +// The task is divided into macro blocks. Data is copied first into a macro +// block in a workspace. This has two purposes: (a) bringing data into +// cache, and (b) permuting data so that it can be used much more easily in +// a dot-product filter. +// +// When there is no depth multiplication: +// +// The permutations required for dot-products are local, within 4 data points +// down the depth and 4 across the width. We want to pull in input data at least +// 8-bytes at a time, down the depth, and so we divide the macro blocks into +// 1x4x8 (height, width, depth) and further divide the micro blocks into +// sub-blocks with shape (1x4x4). +// +// Each macro-block is constructed from micro-blocks that are internally +// rearranged during loading into the macro-block workspace. +// +// In other words, the micro-block shape is +// {1, 1, 4, 8} +// Each macro block is typically shape +// {1, height_block_size, 4 * workspace_width_micro_repeats, 64} +// and workspace_width_micro_repeats is chosen so it fits into the +// workspace. +// +// However, if depth < 64, we decrease the macro block depth, enabling us to +// increase the macro-block width. +// +// When there is depth multiplication: +// +// We require input-depth = 1 and exploit that instead. Note that output data +// is still full-depth, *as is the filter and bias data after certain +// adjustments*, and so the filter stage in this case still proceeds in +// terms of sub-blocks. +// +// The Magic of these numbers: +// 4 is the number of input elements used in each dot-product. +// 8 is the number of inputs we load at a time into a register. +// 64 is min amount of data to be loaded in a stretch (when possible). +// +// FILTER DATA PREPARATION +// +// Filter data needs to be permuted in a fashion like that of input data, and +// this is done in a preprocessing stage. In addition, this stage extends the +// filter in the direction of width from 3 to 4. The extra filter taps are set +// to zero so that input data does not have to be zeroed before applying +// dot-products. +// +// OVERALL COUNTS: HANDLING TRAILING ITERATION +// +// Often it is necessary to handle the last iteration in a loop differently, +// generally because the final item is shorter. The logic to detect the +// special case can be a bit expensive. We use a scheme in which there are +// two counts, in a pattern like xxx_yyy_repeats and +// xxx_overall_yyy_repeats. The first gives the count of "normal" +// iterations. The loop iterates over the second count, and the induction +// variable is checked to see if it reaches xxx_yyy_repeats. If there is no +// special trailing iteration, xxx_yyy_repeats = xxx_overall_yyy_repeats, +// and the special code is not executed. +// +// Example: +// Suppose that we characterize a size s as +// f(s) -> (block-4-repetitions, remainder, overall_repetitions): +// f(11) -> (2, 3, 3) +// f(12) -> (3, 0, 3) +// f(13) -> (3, 1, 4) +// +// POINTING OUTSIDE OF INPUT ARRAY. +// +// When there is padding, the input data pointer passed to the fill routines +// points outside of the input array and into a kind-of virtual padded +// margin. It turns out that this simplifies the code and removes +// conditional statements. It is hard to explain why without comparing two +// versions of the code. In summary, this way the adjustment into the margin +// can be made unconditionally, and the correction back into the input array +// is done where there is a conditional already. +// +// OVERLAP +// +// Since this is *depthwise* conv, neither the batch nor the depth have overlap. +// The height and depth overlap by (filter_size - 1). Thus some data is used +// twice on the borders of macro blocks. +// +template +inline void DepthwiseConvDotProduct3x3( + const DepthwiseParams& params, const RuntimeShape& input_shape, + const uint8* input_data, const RuntimeShape& filter_shape, + const uint8* filter_data, const RuntimeShape& bias_shape, + const int32* bias_data, const RuntimeShape& output_shape, + uint8* output_data) { + // Check kernel restrictions. + constexpr int filter_size = 3; + constexpr int kSymmetricZeroPoint = 128; + constexpr int kMaxStride = 2; + constexpr int kMaxPadding = 1; + TFLITE_DCHECK_EQ(params.weights_offset, -kSymmetricZeroPoint); + TFLITE_DCHECK_LE(params.stride_width, kMaxStride); + TFLITE_DCHECK_EQ(params.stride_height, params.stride_width); + TFLITE_DCHECK_EQ(params.dilation_width_factor, 1); + TFLITE_DCHECK_EQ(params.dilation_height_factor, 1); + TFLITE_DCHECK_LE(params.padding_values.width, kMaxPadding); + TFLITE_DCHECK_LE(params.padding_values.height, kMaxPadding); + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + TFLITE_DCHECK_LE(params.quantized_activation_min, + params.quantized_activation_max); + + // Key kernel parameters (along with padding handled later). + const int stride = params.stride_width; + const int depth_multiplier = params.depth_multiplier; + const bool has_depth_multiplication = depth_multiplier > 1; + + // Extract task dimensions. + const int input_depth = input_shape.Dims(3); + const int output_depth = MatchingDim(filter_shape, 3, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + TFLITE_DCHECK(!has_depth_multiplication || input_depth == 1); + TFLITE_DCHECK(has_depth_multiplication || input_depth == output_depth); + TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_depth); + TFLITE_DCHECK_EQ(input_depth * depth_multiplier, output_depth); + TFLITE_DCHECK_EQ(MatchingDim(filter_shape, 1, filter_shape, 2), filter_size); + + // Return now if nothing to do. + if (output_width == 0 || output_height == 0) { + return; + } + + // Kernel parameter structure: set basic fields. + // + // In asm it is easier to pass a structure than more than, say, 8 parameters. + DepthwiseConvDotProdParams function_params; + function_params.input_depth = input_depth; + function_params.output_depth = output_depth; + function_params.input_offset = params.input_offset; + function_params.output_offset = params.output_offset; + function_params.output_multiplier = params.output_multiplier; + function_params.output_shift = params.output_shift; + function_params.quantized_activation_min = params.quantized_activation_min; + function_params.quantized_activation_max = params.quantized_activation_max; + function_params.stride = stride; + + // Handle inbound bias data. + // + // Note that this data is adjusted in a per-depth process before the main + // filters. The adjustment accounts for a non-symmetric input offset. + // + // Kernel subroutines need to be able to operate consistently on an bias + // array. Where there is no bias, we provide one filled with zeros. + constexpr int kMinBiasLoad = 8; + int32 zero_bias_data[kMinBiasLoad]; + if (bias_data) { + function_params.bias_increment = 4; + } else { + memset(zero_bias_data, 0, sizeof(zero_bias_data)); + bias_data = &zero_bias_data[0]; + function_params.bias_increment = 0; + } + TFLITE_DCHECK_LE(2 * function_params.bias_increment, kMinBiasLoad); + + // Process padding. + // + // Whether "correct" or not, this matches ComputeConvSizes. When there is + // stride > 1 there can be padding on the bottom or top, and therefore + // we need to consider padding. This is true even if one or other of the + // padding_values is 0. + const int padded_width = (output_width - 1) * stride + filter_size; + { + const int padding_left = params.padding_values.width; + // Right padding would be -1 if discarding input because of stride. + const int padding_right = + std::max(padded_width - input_width - padding_left, 0); + const int padding_top = params.padding_values.height; + const int padded_height = (output_height - 1) * stride + filter_size; + const int padding_bottom = + std::max(padded_height - input_height - padding_top, 0); + + function_params.padding_left = padding_left; + function_params.padding_right = padding_right; + function_params.padding_top = padding_top; + function_params.padding_bottom = padding_bottom; + + TFLITE_DCHECK_LE(padding_left, padding_right); + TFLITE_DCHECK_LE(padding_top, padding_bottom); + } + // When stride == 1 left or top padding may only be non-zero. + // This is when padding is specified but not needed on a trailing dimension. + // When stride == 2 right or bottom padding may only be non-zero. + // This is a result of the details of the padding calculations. + const bool padding_required = + params.padding_type == tflite::PaddingType::kSame || + function_params.padding_right > 0 || function_params.padding_bottom > 0; + + // Choose parameter-specific kernel subroutines. + // + // The main part of the kernel has two stages. First, a temporary workspace is + // filled with padded and permuted data. Second, the filter is applied to the + // workspace data to generate output. + // + // The workspace fill stage handles padding so that the filter stage does not + // need to account for it. The workspace fill stage does not need to + // understand striding, and implicitly handles striding through the parameters + // that it is given. + using pack_macro_block_func_t = decltype( + &PackMacroBlock::Run); + using kernel_macro_block_func_t = decltype( + &KernelMacroBlock::Run); + pack_macro_block_func_t pack_macro_block_func; + kernel_macro_block_func_t kernel_macro_block_func; + { + if (has_depth_multiplication) { + if (padding_required) { + pack_macro_block_func = + PackMacroBlock::Run; + } else { + pack_macro_block_func = + PackMacroBlock::Run; + } + if (stride == 1) { + kernel_macro_block_func = + KernelMacroBlock::Run; + } else { + kernel_macro_block_func = + KernelMacroBlock::Run; + } + } else { + if (padding_required) { + pack_macro_block_func = + PackMacroBlock::Run; + } else { + pack_macro_block_func = + PackMacroBlock::Run; + } + if (stride == 1) { + kernel_macro_block_func = KernelMacroBlock< + implementation, DepthwiseConvDepthMultiplication::kNoMultiplication, + /*stride=*/1>::Run; + } else { + kernel_macro_block_func = KernelMacroBlock< + implementation, DepthwiseConvDepthMultiplication::kNoMultiplication, + /*stride=*/2>::Run; + } + } + } + + // Stride-only variables. + // + // stride == 1 ? 4 : 2: + const int output_height_per_macro = 6 - 2 * stride; + // output_height_per_macro * stride: + constexpr int input_height_per_macro = 4; + // Number of rows per micro block (= rows per macro block) is + // (output_height_per_macro - 1) * stride + 1 + (filter_size - 1) + // = stride == 1 ? 3 + filter_size : 2 + filter_size: + const int height_block_size = 4 + filter_size - stride; + const int input_height_overlap = filter_size - stride; + // stride == 1 ? 4 : 2: + function_params.four_over_stride = output_height_per_macro; + + TFLITE_DCHECK_EQ(stride * function_params.four_over_stride, 4); + TFLITE_DCHECK_EQ(height_block_size, + input_height_per_macro + input_height_overlap); + + // Create workspaces. + // + // Filter workspace is for shuffle: only first depth/8 is used. + // indexed as [depth/8][sub-block][height][depth][width]. + TFLITE_DCHECK_LE(output_depth, kDepthwiseConvAdjustedBiasLimit); + TFLITE_DCHECK_EQ(kDepthwiseConvAdjustedBiasLimit % 8, 0); + int8 macroblock_workspace[kDepthwiseConvScratchWorkspaceSize]; + int32 adjusted_bias_data[kDepthwiseConvAdjustedBiasLimit]; + int8 filter_workspace[kDepthwiseConvAdjustedBiasLimit >> 3][3][2][4][4]; + + // Output depth characterization. + // + const int depth_macro_count = output_depth / 64; + const int depth_overall_macro_count = (output_depth + 63) / 64; + // Number of micro blocks down the depth in a final incomplete macro block. + const int depth_trailing_micro_repeats = output_depth / 8 % 8; + // The output_depth may not have a remainder: it must be a multiple of 8. + TFLITE_DCHECK_EQ(output_depth, + 64 * depth_macro_count + 8 * depth_trailing_micro_repeats); + + // Characterize the first macro block depth, the largest. + // + // We base treatment of the width on the trailing macro block if there are + // no full blocks, in order to do more work together (that is, increase + // workspace_width_micro_repeats when largest_macro_depth < 64). + const int largest_macro_depth = + has_depth_multiplication + ? 1 + : (depth_macro_count > 0 ? 64 : 8 * depth_trailing_micro_repeats); + + // Characterize width, consumption of input and generation of output. + // + // In the case of depth multiplication, we ensure that some of the workspace + // at the end remains unused. This enables the filter routines to load the + // "next" data, of at least 16 bytes, even when at the end of the workspace. + // It is relatively expensive to detect the end micro block. It is also very + // difficult to test for (to trigger) erroneous reads (past end of array) in + // the depth multplication case. + int workspace_width_micro_repeats = + (has_depth_multiplication ? kDepthwiseConvScratchWorkspaceSize - 16 + : kDepthwiseConvScratchWorkspaceSize) / + (4 * largest_macro_depth * height_block_size); + // When there is no depth multiplication, the workspace depth is a multiple of + // 8, which ensures that workspace rows are 16-byte aligned. (Actually 32, + // because of the micro width of 4.) This is not necessarily the case under + // depth multiplication, so we adjust now to impose this restriction. + if (has_depth_multiplication) { + workspace_width_micro_repeats = (workspace_width_micro_repeats / 4) * 4; + } + TFLITE_DCHECK_EQ((workspace_width_micro_repeats * largest_macro_depth) % 4, + 0); + // Discount 1 of the micro-block repeats in each macro block to account for + // overlap. + const int consumed_width_per_macro_block = + 4 * (workspace_width_micro_repeats - 1); + const int output_width_per_macro_block = + function_params.four_over_stride * (workspace_width_micro_repeats - 1); + TFLITE_DCHECK_GT(workspace_width_micro_repeats, 1); + TFLITE_DCHECK_EQ(output_width_per_macro_block * stride, + consumed_width_per_macro_block); + + // Width repetitions and residuals. + // + // Use of the workspace is characterized primarily in terms of *padded input*. + // Striding only matters in a few places. + // + // Simplifications: We require that there always be at least one full + // micro-block across the width. Since the maximum padding is 1, the trailing + // padding cannot span two micro blocks. + const int residual_micro_width = padded_width % 4; + // We base the count of macro blocks on the amount of padded input data each + // one consumes. + int width_overall_macro_count = (padded_width - residual_micro_width + + consumed_width_per_macro_block - 1) / + consumed_width_per_macro_block; + // Recall that we left a micro block at the end of each macro block for use as + // overlap. There is a special case in which we can use one fewer macro + // blocks, with the last one consuming extra input. (But not if the + // calculation thinks that we can use zero blocks.) + if (padded_width <= + ((width_overall_macro_count - 1) * consumed_width_per_macro_block + 4)) { + width_overall_macro_count -= 1; + } + width_overall_macro_count = std::max(width_overall_macro_count, 1); + // We always have to treat the final macro block along width as trailing, + // because even if it is full in terms of padded input, it will be incomplete + // in terms of output. + const int width_macro_count = width_overall_macro_count - 1; + // Micro blocks are traversed in terms of input in fill routines. + const int width_trailing_micro_repeats = + (padded_width - consumed_width_per_macro_block * width_macro_count) / 4; + const int width_overall_trailing_micro_repeats = + (padded_width - consumed_width_per_macro_block * width_macro_count + 3) / + 4; + // Micro blocks are traversed in terms of output in filtering routines. + const int residual_output_micro_width = + (output_width - 1) % function_params.four_over_stride + 1; + const int output_width_trailing_micro_repeats = + residual_micro_width > (filter_size - 1) + ? width_trailing_micro_repeats + : width_trailing_micro_repeats - 1; + // Check results. + TFLITE_DCHECK_GT(width_overall_trailing_micro_repeats, 0); + TFLITE_DCHECK_EQ(padded_width, + residual_micro_width + + consumed_width_per_macro_block * width_macro_count + + 4 * width_trailing_micro_repeats); + TFLITE_DCHECK_LE(width_overall_macro_count, width_macro_count + 1); + TFLITE_DCHECK_GE(width_overall_macro_count, width_macro_count); + + // Height repetitions and residuals. + // + const int height_macro_count = output_height / output_height_per_macro; + const int residual_output_height = output_height % output_height_per_macro; + const int height_overall_macro_count = + (output_height + output_height_per_macro - 1) / output_height_per_macro; + TFLITE_DCHECK_EQ( + output_height, + residual_output_height + output_height_per_macro * height_macro_count); + TFLITE_DCHECK_LE(height_overall_macro_count, height_macro_count + 1); + TFLITE_DCHECK_GE(height_overall_macro_count, height_macro_count); + + // Data strides. + // + const int input_height_stride = input_width * input_depth; + const int output_height_stride = output_width * output_depth; + const int input_batch_stride = input_height_stride * input_height; + const int output_batch_stride = output_height_stride * output_height; + const int input_depth_macro_stride = has_depth_multiplication ? 0 : 64; + const int input_width_macro_stride = + input_depth * consumed_width_per_macro_block; + const int output_width_macro_stride = + output_depth * output_width_per_macro_block; + + // Store parameters that do not vary across macro blocks. + // + function_params.workspace_width_micro_repeats = workspace_width_micro_repeats; + function_params.height_macro_count = height_overall_macro_count; + function_params.width_macro_count = width_overall_macro_count; + function_params.input_height_stride = input_height_stride; + function_params.output_height_stride = output_height_stride; + function_params.residual_width = residual_micro_width; + + // Preprocess filter and bias data. + // + ProcessPerDepth::Run(filter_data, bias_data, + filter_workspace[0][0][0][0], + adjusted_bias_data, &function_params); + function_params.bias_increment = 4; // Adjusted bias data always spans depth. + + // Main process. + // + // Most kernels are nested batch-height-width-depth. Here we proceed over + // macro blocks batch-width-depth-height. + // + // Example of handling of trailing iteration: when there is trailing depth, + // depth_overall_macro_count = depth_macro_count + 1, so we can adjust the + // dimensions for trailing macro blocks by looking for + // j_depth == depth_macro_count. + for (int b = 0; b < batches; ++b) { + for (int k_width = 0; k_width < width_overall_macro_count; ++k_width) { + // Figure out the work to be done for this macro block. If it trails in + // any dimension, the work in that dimension is adjusted. + // The work to be done across widths has 3 cases: + // (a) A full macro block, + // (b) Partial terminal macro block, with input and output ending in + // same micro block, and + // (c) Partial terminal macro block, with output corresponding to one + // fewer micro blocks, because filter extends across micro-block + // boundary. + if (k_width != width_macro_count) { + function_params.output_residual_width = 0; + function_params.input_width_micro_repeats = + workspace_width_micro_repeats; + function_params.input_width_overall_micro_repeats = + workspace_width_micro_repeats; + function_params.output_width_micro_repeats = + workspace_width_micro_repeats - 1; + } else { + function_params.output_residual_width = residual_output_micro_width; + function_params.input_width_micro_repeats = + width_trailing_micro_repeats; + function_params.input_width_overall_micro_repeats = + width_overall_trailing_micro_repeats; + function_params.output_width_micro_repeats = + output_width_trailing_micro_repeats; + } + function_params.output_width_overall_micro_repeats = + function_params.output_residual_width == 0 + ? function_params.output_width_micro_repeats + : function_params.output_width_micro_repeats + 1; + + for (int j_depth = 0; j_depth < depth_overall_macro_count; ++j_depth) { + const uint8* input_data_block = + input_data + b * input_batch_stride + + j_depth * input_depth_macro_stride + + k_width * input_width_macro_stride - + function_params.padding_left * input_depth - + function_params.padding_top * input_height_stride; + uint8* output_data_block = output_data + b * output_batch_stride + + j_depth * 64 + + k_width * output_width_macro_stride; + + function_params.depth_micro_repeats = + j_depth == depth_macro_count ? depth_trailing_micro_repeats : 8; + // Under depth multiplication the workspace_height_stride does not have + // to depend on input_width_overall_micro_repeats, but this improves the + // compactness of workspace use. + const int workspace_height_stride = + has_depth_multiplication + ? 16 * ((function_params.input_width_overall_micro_repeats + + 3) >> + 2) + : 4 * function_params.input_width_overall_micro_repeats * 8 * + function_params.depth_micro_repeats; + TFLITE_DCHECK_EQ(workspace_height_stride % 16, 0); + function_params.workspace_height_stride = workspace_height_stride; + + // For the first macro block for output rows we fill in the first few + // rows. After this we will copy them (see below in loop.) + function_params.inbound_block_height = input_height_overlap; + pack_macro_block_func(-1, k_width, input_data_block, + macroblock_workspace, &function_params); + input_data_block += input_height_stride * input_height_overlap; + + for (int i_height = 0; i_height < height_overall_macro_count; + ++i_height) { + if (i_height != height_macro_count) { + function_params.inbound_block_height = input_height_per_macro; + function_params.outbound_block_height = output_height_per_macro; + } else { + function_params.inbound_block_height = + residual_output_height * stride; + function_params.outbound_block_height = residual_output_height; + } + TFLITE_DCHECK_LT(i_height * output_height_per_macro, output_height); + TFLITE_DCHECK_LT(i_height * input_height_per_macro, input_height); + TFLITE_DCHECK_LT(k_width * output_width_per_macro_block, + output_width); + TFLITE_DCHECK_LT(k_width * consumed_width_per_macro_block, + input_width); + + // Macro blocks overlap by input_height_overlap rows, so we copy + // those instead of filling in afresh. The first macro block across + // output rows was filled in outside of the loop (above). + if (i_height > 0) { + memcpy(macroblock_workspace, + macroblock_workspace + + input_height_per_macro * workspace_height_stride, + input_height_overlap * workspace_height_stride); + } + + pack_macro_block_func( + i_height, k_width, input_data_block, + macroblock_workspace + + input_height_overlap * workspace_height_stride, + &function_params); + + kernel_macro_block_func(macroblock_workspace, + filter_workspace[8 * j_depth][0][0][0], + adjusted_bias_data + 64 * j_depth, + output_data_block, &function_params); + + input_data_block += input_height_stride * input_height_per_macro; + output_data_block += output_height_stride * output_height_per_macro; + } + } + } + } +} + +} // namespace depthwise_conv +} // namespace optimized_ops +} // namespace tflite + +#endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_TRANSITIONAL_H_ diff --git a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h index 7cc5679dcb..c38f37416d 100644 --- a/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ b/tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -27,7 +27,7 @@ namespace tflite { // Used in tests and template parameters to control which version of depthwise // convolution is called. Primarily for reference code, and specializations // forced in tests. -enum class DepthwiseConvInvocation { +enum class DepthwiseConvImplementation { // Run all tests against kUseStandardEntry even if also testing another // kernel, since we need to be sure that the main DepthwiseConv() function in // optimized_ops.h dispatches to a correctly-executing kernel. @@ -59,6 +59,7 @@ enum class DepthwiseConvDepthMultiplication { }; namespace reference_ops { +namespace depthwise_conv { template inline int32 DepthwiseConvRound(int32 x, int32 quantized_multiplier, @@ -172,13 +173,15 @@ struct DepthwiseConvBasicKernel { } }; +} // namespace depthwise_conv + inline void DepthwiseConv( const DepthwiseParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, const uint8* filter_data, const RuntimeShape& bias_shape, const int32* bias_data, const RuntimeShape& output_shape, uint8* output_data) { - return DepthwiseConvBasicKernel< + return depthwise_conv::DepthwiseConvBasicKernel< DepthwiseConvOutputRounding::kAwayFromZero>::Run(params, input_shape, input_data, filter_shape, filter_data, bias_shape, -- GitLab From 40c7e8e319bc4384d67da4fd1be37960b0e8d565 Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Thu, 14 Feb 2019 14:44:52 -0800 Subject: [PATCH 161/351] Unprotect SignatureDef default input names. PiperOrigin-RevId: 234033427 --- .../python/saved_model/model_utils/export_utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/saved_model/model_utils/export_utils.py b/tensorflow/python/saved_model/model_utils/export_utils.py index e9f6f894c2..adb6bf2667 100644 --- a/tensorflow/python/saved_model/model_utils/export_utils.py +++ b/tensorflow/python/saved_model/model_utils/export_utils.py @@ -51,9 +51,11 @@ SIGNATURE_KEY_MAP = mode_keys.ModeKeyMap(**{ ModeKeys.TRAIN: signature_constants.DEFAULT_TRAIN_SIGNATURE_DEF_KEY, ModeKeys.TEST: signature_constants.DEFAULT_EVAL_SIGNATURE_DEF_KEY}) -_SINGLE_FEATURE_DEFAULT_NAME = 'feature' -_SINGLE_RECEIVER_DEFAULT_NAME = 'input' -_SINGLE_LABEL_DEFAULT_NAME = 'label' +# Default names used in the SignatureDef input map, which maps strings to +# TensorInfo protos. +SINGLE_FEATURE_DEFAULT_NAME = 'feature' +SINGLE_RECEIVER_DEFAULT_NAME = 'input' +SINGLE_LABEL_DEFAULT_NAME = 'label' ### Below utilities are specific to SavedModel exports. @@ -89,7 +91,7 @@ def build_all_signature_defs(receiver_tensors, ValueError: if export_outputs is not a dict """ if not isinstance(receiver_tensors, dict): - receiver_tensors = {_SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors} + receiver_tensors = {SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors} if export_outputs is None or not isinstance(export_outputs, dict): raise ValueError('export_outputs must be a dict and not' '{}'.format(type(export_outputs))) @@ -109,7 +111,7 @@ def build_all_signature_defs(receiver_tensors, six.iteritems(receiver_tensors_alternatives)): if not isinstance(receiver_tensors_alt, dict): receiver_tensors_alt = { - _SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt + SINGLE_RECEIVER_DEFAULT_NAME: receiver_tensors_alt } for output_key, export_output in export_outputs.items(): signature_name = '{}:{}'.format(receiver_name or 'None', output_key or -- GitLab From 68caba625c84605c86f84b976aed0d0fffdf85d7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 15:38:44 -0800 Subject: [PATCH 162/351] Fix a copy.bara issue. PiperOrigin-RevId: 234043070 --- tensorflow/core/profiler/rpc/client/capture_profile.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index 0a3dbeaef6..a543111d9e 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -34,9 +34,6 @@ namespace tensorflow { namespace profiler { namespace client { -using ::tensorflow::grpc::TPUProfileAnalysis; -using ::tensorflow::grpc::TPUProfiler; - constexpr uint64 kMaxEvents = 1000000; string GetCurrentTimeStampAsString() { -- GitLab From 9153bf73e244a5b24646aaa54f94d6d11601e0d5 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Thu, 14 Feb 2019 16:07:50 -0800 Subject: [PATCH 163/351] Re-enable kmeans test. PiperOrigin-RevId: 234047954 --- tensorflow/contrib/factorization/BUILD | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 48a6ef4dca..448d35d5e9 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -203,10 +203,6 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "nomac", # b/73741358 - "notsan", # b/67512932 - ], deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", -- GitLab From 7328add9da36e8af75285b167193f35e963f5595 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 16:08:07 -0800 Subject: [PATCH 164/351] This fixes the package name issue between 1.0 and 2.0_preview as well as gpu and cpu package names. PiperOrigin-RevId: 234048003 --- tensorflow/tools/ci_build/copy_binary.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index aec1d7e28d..856d64eb82 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -32,7 +32,7 @@ import shutil import tempfile import zipfile -TF_NIGHTLY_REGEX = (r"(.+)tf_nightly(|_gpu)(?:_2.0_preview?)?-(\d\.[\d]{1,2}" +TF_NIGHTLY_REGEX = (r"(.+)(tf_nightly.*)-(\d\.[\d]{1,2}" r"\.\d.dev[\d]{0,8})-(.+)\.whl") BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" @@ -43,7 +43,7 @@ def check_existence(filename): raise RuntimeError("%s not found." % filename) -def copy_binary(directory, origin_tag, new_tag, version, gpu=False): +def copy_binary(directory, origin_tag, new_tag, version, package): """Rename and copy binaries for different python versions. Arguments: @@ -51,14 +51,10 @@ def copy_binary(directory, origin_tag, new_tag, version, gpu=False): origin_tag: str of the old python version tag new_tag: str of the new tag version: the version of the package - gpu: bool if its a gpu build or not + package: str, name of the package """ print("Rename and copy binaries with %s to %s." % (origin_tag, new_tag)) - if gpu: - package = "tf_nightly_gpu" - else: - package = "tf_nightly" origin_binary = BINARY_STRING_TEMPLATE % (package, version, origin_tag) new_binary = BINARY_STRING_TEMPLATE % (package, version, new_tag) zip_ref = zipfile.ZipFile(os.path.join(directory, origin_binary), "r") @@ -120,7 +116,7 @@ def main(): check_existence(args.filename) regex_groups = re.search(TF_NIGHTLY_REGEX, args.filename) directory = regex_groups.group(1) - gpu = regex_groups.group(2) + package = regex_groups.group(2) version = regex_groups.group(3) origin_tag = regex_groups.group(4) old_py_ver = re.search(r"(cp\d\d)", origin_tag).group(1) @@ -129,7 +125,7 @@ def main(): new_tag = origin_tag.replace(old_py_ver, "cp" + args.new_py_ver) # Copy the binary with the info we have - copy_binary(directory, origin_tag, new_tag, version, gpu) + copy_binary(directory, origin_tag, new_tag, version, package) if __name__ == "__main__": -- GitLab From 450cfe56193befdd183b3d900363ec10d71d1212 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 16:27:12 -0800 Subject: [PATCH 165/351] Add legacy string flat hash map op kernels PiperOrigin-RevId: 234050903 --- tensorflow/core/kernels/lookup_tables/BUILD | 51 +++- ...ops.cc => fingerprint64_map_op_kernels.cc} | 4 +- .../lookup_tables/flat_hash_map_op_kernels.cc | 275 ++++++++++++++++++ .../lookup_tables/generic_table_op_kernels.cc | 227 +++++++++++++++ ...table_op_utils.h => op_kernel_templates.h} | 6 +- ...rface.h => resource_interface_templates.h} | 6 +- tensorflow/core/ops/lookup_table_ops.cc | 61 ++++ 7 files changed, 614 insertions(+), 16 deletions(-) rename tensorflow/core/kernels/lookup_tables/{fingerprint64_map_ops.cc => fingerprint64_map_op_kernels.cc} (96%) create mode 100644 tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc create mode 100644 tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc rename tensorflow/core/kernels/lookup_tables/{table_op_utils.h => op_kernel_templates.h} (98%) rename tensorflow/core/kernels/lookup_tables/{lookup_table_interface.h => resource_interface_templates.h} (94%) create mode 100644 tensorflow/core/ops/lookup_table_ops.cc diff --git a/tensorflow/core/kernels/lookup_tables/BUILD b/tensorflow/core/kernels/lookup_tables/BUILD index 5cf628ef28..a25660e987 100644 --- a/tensorflow/core/kernels/lookup_tables/BUILD +++ b/tensorflow/core/kernels/lookup_tables/BUILD @@ -13,8 +13,8 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_kernel_library") cc_library( - name = "lookup_table_interface", - hdrs = ["lookup_table_interface.h"], + name = "resource_interface_templates", + hdrs = ["resource_interface_templates.h"], deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -23,8 +23,8 @@ cc_library( ) cc_library( - name = "table_op_utils", - hdrs = ["table_op_utils.h"], + name = "op_kernel_templates", + hdrs = ["op_kernel_templates.h"], deps = [ "//tensorflow/core:framework", "//tensorflow/core:framework_internal", @@ -40,15 +40,50 @@ cc_library( ) tf_kernel_library( - name = "fingerprint64_map_ops", + name = "fingerprint64_map_op_kernels", srcs = [ - "fingerprint64_map_ops.cc", + "fingerprint64_map_op_kernels.cc", ], deps = [ - ":lookup_table_interface", - ":table_op_utils", + ":op_kernel_templates", + ":resource_interface_templates", "//tensorflow/core:framework", "//tensorflow/core:lib", "@com_google_absl//absl/strings", ], ) + +tf_kernel_library( + name = "flat_hash_map_op_kernels", + srcs = [ + "flat_hash_map_op_kernels.cc", + ], + deps = [ + ":op_kernel_templates", + ":resource_interface_templates", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/kernels:tensor_flag_utils", + "@com_google_absl//absl/base:core_headers", + "@com_google_absl//absl/container:flat_hash_map", + "@com_google_absl//absl/memory", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:span", + ], +) + +tf_kernel_library( + name = "generic_table_op_kernels", + srcs = [ + "generic_table_op_kernels.cc", + ], + deps = [ + ":op_kernel_templates", + ":resource_interface_templates", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:string_view_variant_wrapper", + "@com_google_absl//absl/strings", + ], +) diff --git a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc similarity index 96% rename from tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc rename to tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc index 65487d307e..36274bc6b6 100644 --- a/tensorflow/core/kernels/lookup_tables/fingerprint64_map_ops.cc +++ b/tensorflow/core/kernels/lookup_tables/fingerprint64_map_op_kernels.cc @@ -15,8 +15,8 @@ limitations under the License. #include "absl/strings/string_view.h" #include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/kernels/lookup_tables/lookup_table_interface.h" -#include "tensorflow/core/kernels/lookup_tables/table_op_utils.h" +#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h" +#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/fingerprint.h" #include "tensorflow/core/platform/macros.h" diff --git a/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc b/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc new file mode 100644 index 0000000000..9c37ca87ce --- /dev/null +++ b/tensorflow/core/kernels/lookup_tables/flat_hash_map_op_kernels.cc @@ -0,0 +1,275 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include "absl/base/attributes.h" +#include "absl/container/flat_hash_map.h" +#include "absl/memory/memory.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h" +#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/fingerprint.h" + +namespace tensorflow { +namespace tables { + +using errors::InvalidArgument; + +// absl::flat_hash_map backed table with inline +// fallback to x -> (Fingerprint64(x) % num_oov_buckets) + offset when looked +// up keys are not in the flat_hash_map. Inlining the fallback table turns out +// to be quite efficient in comparison to virtual dispatch for the fallback +// lookup. +template +class StaticStringFlatHashMap final + : public virtual LookupInterface, + public virtual LookupInterface, + public virtual LookupWithPrefetchInterface< + absl::Span, absl::Span>, + public virtual LookupWithPrefetchInterface, + absl::Span>, + public virtual KeyValueTableInitializerInterface< + absl::Span, absl::Span>, + public virtual KeyValueTableInitializerInterface< + absl::Span, absl::Span>, + public virtual SizeInterface { + public: + using value_type = ValueType; + + StaticStringFlatHashMap(bool enable_synchronization, int64 num_oov_buckets) + : num_oov_buckets_(num_oov_buckets) { + if (enable_synchronization) { + mutex_ = absl::make_unique(); + } + } + + Status Initialize(absl::Span keys, + absl::Span values) override { + if (ABSL_PREDICT_FALSE(keys.size() != values.size())) { + return errors::InvalidArgument( + "keys and values do not have the same number of elements (found ", + keys.size(), " vs ", values.size(), ")."); + } + + table_.reserve(table_.size() + keys.size()); + for (size_t i = 0; i < keys.size(); ++i) { + table_.insert_or_assign(string(keys[i]), values[i]); + } + return Status::OK(); + } + + Status Initialize(absl::Span keys, + absl::Span values) override { + if (ABSL_PREDICT_FALSE(keys.size() != values.size())) { + return errors::InvalidArgument( + "keys and values do not have the same number of elements (found ", + keys.size(), " vs ", values.size(), ")."); + } + + table_.reserve(table_.size() + keys.size()); + for (size_t i = 0; i < keys.size(); ++i) { + table_.insert_or_assign(keys[i], values[i]); + } + return Status::OK(); + } + + Status Lookup(const absl::string_view& key, ValueType* value) const override { + *value = LookupHelper(key); + return Status::OK(); + } + + Status Lookup(const string& key, ValueType* value) const override { + *value = LookupHelper(key); + return Status::OK(); + } + + // keys and values are guaranteed to have the same size by convention. + Status Lookup(absl::Span keys, + absl::Span values, + int64 prefetch_lookahead) const override { + const auto keys_size = keys.size(); + if (prefetch_lookahead <= 0 || prefetch_lookahead >= keys_size) { + for (size_t i = 0; i < keys_size; ++i) { + values[i] = LookupHelper(keys[i]); + } + } else { + for (size_t i = 0; i < keys_size; ++i) { + if (i + prefetch_lookahead < keys.size()) { + table_.prefetch(keys[i + prefetch_lookahead]); + } + values[i] = LookupHelper(keys[i]); + } + } + return Status::OK(); + } + + // keys and values are guaranteed to have the same size by convention. + Status Lookup(absl::Span keys, absl::Span values, + int64 prefetch_lookahead) const override { + const auto keys_size = keys.size(); + if (prefetch_lookahead <= 0 || prefetch_lookahead >= keys_size) { + for (size_t i = 0; i < keys_size; ++i) { + values[i] = LookupHelper(keys[i]); + } + } else { + for (size_t i = 0; i < keys_size; ++i) { + if (i + prefetch_lookahead < keys.size()) { + table_.prefetch(keys[i + prefetch_lookahead]); + } + values[i] = LookupHelper(keys[i]); + } + } + return Status::OK(); + } + + uint64 Size() const override { return table_.size(); } + + mutex* GetMutex() const override { return mutex_.get(); } + + string DebugString() const override { return __PRETTY_FUNCTION__; } + + private: + template + ABSL_ATTRIBUTE_ALWAYS_INLINE ValueType + LookupHelper(const T& key_to_find) const { + auto it = table_.find(key_to_find); + if (it != table_.end()) { + return it->second; + } else { + return static_cast(Fingerprint64(key_to_find) % + num_oov_buckets_) + + StaticStringFlatHashMap::Size(); + } + } + + const int64 num_oov_buckets_; + std::unique_ptr mutex_; + // The underlying table. + absl::flat_hash_map table_; + TF_DISALLOW_COPY_AND_ASSIGN(StaticStringFlatHashMap); +}; + +// Used to allocate StaticStringFlatHashMap objects via the AllocateContainer +// method. +template +struct StaticStringFlatHashMapFactory { + struct Functor { + using resource_type = StaticStringFlatHashMap; + + template + static Status AllocateContainer(OpKernelContext* ctx, OpKernel* kernel, + StaticStringFlatHashMapBase** container) { + OpInputList table_int64_args; + TF_RETURN_IF_ERROR( + ctx->input_list("table_int64_args", &table_int64_args)); + const size_t variadic_arg_size = table_int64_args.size(); + if (ABSL_PREDICT_FALSE(variadic_arg_size != 2)) { + return errors::InvalidArgument( + "table_int64_args should have 2 elements (found ", + variadic_arg_size, + "). Set the first element to 1 to enable synchronized table use " + "and to 0 otherwise. The second element should be " + "num_oov_buckets."); + } + + const bool enable_synchronization = ctx->input(0).scalar()() != 0; + const int64 num_oov_buckets = ctx->input(1).scalar()(); + if (ABSL_PREDICT_FALSE(num_oov_buckets <= 0)) { + return errors::InvalidArgument( + "num_oov_buckets must be positive. Found: ", num_oov_buckets); + } + auto* non_virtual_container = + new StaticStringFlatHashMap(enable_synchronization, num_oov_buckets); + *container = non_virtual_container; + const Tensor& keys = ctx->input(table_int64_args.size()); + const Tensor& values = ctx->input(table_int64_args.size() + 1); + if (keys.NumElements() == 0) { + return Status::OK(); + } else if (keys.dtype() == DT_STRING) { + return Functor::Initialize( + keys.flat(), + values.flat(), + non_virtual_container); + } else if (keys.dtype() == DT_VARIANT) { + auto keys_flat = keys.flat(); + if (keys_flat(0).get() == nullptr) { + return errors::InvalidArgument( + "Variant keys tensor must have subtype absl::string_view."); + } + return Functor::Initialize( + keys.flat(), + values.flat(), + non_virtual_container); + } + return errors::InvalidArgument( + "keys tensor must have type DT_STRING or type DT_VARIANT with " + "subtype absl::string_view."); + } + + static Status Initialize( + const absl::Span keys, + const absl::Span + values, + StaticStringFlatHashMap* container) { + return container->Initialize(keys, values); + } + + static Status Initialize( + const absl::Span keys, + const absl::Span + values, + StaticStringFlatHashMap* container) { + std::vector keys_vec; + keys_vec.reserve(keys.size()); + for (size_t i = 0; i < keys.size(); ++i) { + keys_vec.push_back(*keys[i].get()); + } + return container->Initialize(keys_vec, values); + } + }; +}; + +template +using ResourceOp = ResourceConstructionOp< + typename StaticStringFlatHashMapFactory< + StaticStringFlatHashMap>::Functor, + // These are the aliases. + LookupInterface, + LookupWithPrefetchInterface, + absl::Span>, + LookupInterface, + LookupWithPrefetchInterface, + absl::Span>, + SizeInterface>; + +#define REGISTER_STRING_KERNEL(table_value_dtype) \ + REGISTER_KERNEL_BUILDER( \ + Name("StaticStringFlatHashMap") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("heterogeneous_key_dtype") \ + .TypeConstraint("table_value_dtype"), \ + ResourceOp); + +REGISTER_STRING_KERNEL(int32); +REGISTER_STRING_KERNEL(int64); + +#undef REGISTER_STRING_KERNEL + +} // namespace tables +} // namespace tensorflow diff --git a/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc b/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc new file mode 100644 index 0000000000..9bb29afd19 --- /dev/null +++ b/tensorflow/core/kernels/lookup_tables/generic_table_op_kernels.cc @@ -0,0 +1,227 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "absl/strings/string_view.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/kernels/lookup_tables/op_kernel_templates.h" +#include "tensorflow/core/kernels/lookup_tables/resource_interface_templates.h" +#include "tensorflow/core/kernels/string_view_variant_wrapper.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { +namespace tables { + +template +struct TensorInsertFactory { + class Functor { + public: + // If KeyType is not 'valid' then use the value it wraps as the table key + // type. + using resource_type = InsertOrAssignInterface< + absl::Span, + typename absl::conditional_t< + IsValidDataType::value, absl::Span, + absl::Span>>; + + static Status TensorInsert(const Tensor& keys, const Tensor& values, + resource_type* table) { + if (keys.NumElements() != values.NumElements()) { + return errors::InvalidArgument( + "OpKernel tried to map keys vector of size ", keys.NumElements(), + " to values vector of size ", values.NumElements()); + } + return TensorInsertHelper(keys, values, table); + } + + private: + // keys and *values arguments to TensorInsert must have the same number of + // elements. This is guaranteed above. + + // 'Simple' types below are types which are natively supported in TF. + // Non-variant KeyType which is the same as Container::key_type. + // No need to static_cast. + template + static absl::enable_if_t::value, Status> + TensorInsertHelper(const Tensor& keys, const Tensor& values, + resource_type* table) { + return table->InsertOrAssign(keys.flat(), + values.flat()); + } + + // Variant KeyType; the wrapped type is convertible to + // Container::key_type. + template + static absl::enable_if_t::value, Status> + TensorInsertHelper(const Tensor& keys, const Tensor& values, + resource_type* table) { + const auto keys_flat = keys.flat(); + std::vector keys_vec; + keys_vec.reserve(keys_flat.size()); + for (size_t i = 0; i < keys_flat.size(); ++i) { + keys_vec.emplace_back( + *keys_flat(i).get()); + } + return table->InsertOrAssign(keys_vec, values.flat()); + } + }; +}; + +template +using InsertOp = LookupTableInsertOp< + typename TensorInsertFactory::Functor>; + +template +struct TensorLookupFactory { + class Functor { + public: + // If KeyType is not 'valid' then use the value it wraps as the table key + // type. + using resource_type = LookupWithPrefetchInterface< + absl::Span, + typename absl::conditional_t< + IsValidDataType::value, absl::Span, + absl::Span>>; + + static Status TensorLookup(const resource_type& table, const Tensor& keys, + const int64 prefetch_lookahead, + const int64 num_keys_per_thread, + thread::ThreadPool* threadpool, Tensor* values) { + if (keys.NumElements() != values->NumElements()) { + return errors::InvalidArgument( + "OpKernel tried to map keys vector of size ", keys.NumElements(), + " to values vector of size ", values->NumElements()); + } + return TensorLookupHelper(table, keys, prefetch_lookahead, + num_keys_per_thread, threadpool, values); + } + + private: + // keys and *values arguments to TensorLookup must have the same number of + // elements. This is guaranteed above. + + // 'Simple' types below are types which are natively supported in TF. + template + static absl::enable_if_t::value, Status> + TensorLookupHelper(const resource_type& table, const Tensor& keys, + const int64 prefetch_lookahead, + const int64 num_keys_per_thread, + thread::ThreadPool* threadpool, Tensor* values) { + const auto keys_flat = keys.flat(); + auto key_span = absl::MakeSpan(keys_flat); + auto value_span = absl::MakeSpan(values->flat().data(), + values->NumElements()); + return MultithreadedTensorLookup(table, prefetch_lookahead, + num_keys_per_thread, key_span, + value_span, threadpool); + } + + // Non-simple KeyType. We'll try an implicit conversion to + // Container::key_type. + template + static absl::enable_if_t::value, Status> + TensorLookupHelper(const resource_type& table, const Tensor& keys, + const int64 prefetch_lookahead, + const int64 num_keys_per_thread, + thread::ThreadPool* threadpool, Tensor* values) { + const auto keys_flat = keys.flat(); + std::vector keys_vec; + const auto keys_size = keys_flat.size(); + keys_vec.reserve(keys_size); + for (size_t i = 0; i < keys_size; ++i) { + keys_vec.emplace_back(*keys_flat(i).get()->get()); + } + absl::Span key_span(keys_vec); + auto value_span = absl::MakeSpan(values->flat().data(), + values->NumElements()); + return MultithreadedTensorLookup(table, prefetch_lookahead, + num_keys_per_thread, key_span, + value_span, threadpool); + } + + // Wrapper around table.BatchLookup which permits sharding across cores. + template + static Status MultithreadedTensorLookup(const resource_type& table, + int64 prefetch_lookahead, + int64 num_keys_per_thread, K keys, + V values, + thread::ThreadPool* threadpool) { + mutex temp_mutex; // Protect status. + Status status; + auto lookup_keys = [&](int64 begin, int64 end) { + auto temp_status = table.Lookup(keys.subspan(begin, end - begin), + values.subspan(begin, end - begin), + prefetch_lookahead); + if (ABSL_PREDICT_FALSE(!temp_status.ok())) { + mutex_lock lock(temp_mutex); + status.Update(temp_status); + } + }; + threadpool->TransformRangeConcurrently( + num_keys_per_thread /* block_size */, keys.size(), lookup_keys); + return status; + } + }; +}; + +template +using LookupOp = LookupTableFindOp< + typename TensorLookupFactory::Functor>; + +struct TableSizeFunctor { + using resource_type = SizeInterface; + + static Status Size(const SizeInterface& table, uint64* size) { + *size = table.Size(); + return Status::OK(); + } +}; + +#define REGISTER_STRING_KERNEL(table_value_dtype) \ + REGISTER_KERNEL_BUILDER( \ + Name("LookupTableInsertOrAssignOp") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("insert_key_tensor_dtype") \ + .TypeConstraint("table_value_dtype"), \ + InsertOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LookupTableInsertOrAssignOp") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("insert_key_tensor_dtype") \ + .TypeConstraint("table_value_dtype"), \ + InsertOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LookupTableFindOp") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("lookup_key_tensor_dtype") \ + .TypeConstraint("table_value_dtype"), \ + LookupOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LookupTableFindOp") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("lookup_key_tensor_dtype") \ + .TypeConstraint("table_value_dtype"), \ + LookupOp); \ + REGISTER_KERNEL_BUILDER(Name("ContainerSizeOp").Device(DEVICE_CPU), \ + ContainerSizeOp); + +REGISTER_STRING_KERNEL(int32); +REGISTER_STRING_KERNEL(int64); + +#undef REGISTER_STRING_KERNEL + +} // namespace tables +} // namespace tensorflow diff --git a/tensorflow/core/kernels/lookup_tables/table_op_utils.h b/tensorflow/core/kernels/lookup_tables/op_kernel_templates.h similarity index 98% rename from tensorflow/core/kernels/lookup_tables/table_op_utils.h rename to tensorflow/core/kernels/lookup_tables/op_kernel_templates.h index b4b2742266..d830062575 100644 --- a/tensorflow/core/kernels/lookup_tables/table_op_utils.h +++ b/tensorflow/core/kernels/lookup_tables/op_kernel_templates.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_ -#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_ +#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_ #include #include @@ -445,4 +445,4 @@ class ContainerSizeOp : public OpKernel { } // namespace tables } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_TABLE_OP_UTILS_H_ +#endif // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_OP_KERNEL_TEMPLATES_H_ diff --git a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h b/tensorflow/core/kernels/lookup_tables/resource_interface_templates.h similarity index 94% rename from tensorflow/core/kernels/lookup_tables/lookup_table_interface.h rename to tensorflow/core/kernels/lookup_tables/resource_interface_templates.h index de6705d694..7331fb400a 100644 --- a/tensorflow/core/kernels/lookup_tables/lookup_table_interface.h +++ b/tensorflow/core/kernels/lookup_tables/resource_interface_templates.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_ -#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_ +#ifndef TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_ +#define TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_ #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/status.h" @@ -96,4 +96,4 @@ class KeyValueTableInitializerInterface : public virtual SynchronizedInterface { } // namespace tables } // namespace tensorflow -#endif // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_LOOKUP_TABLE_INTERFACE_H_ +#endif // TENSORFLOW_CORE_KERNELS_LOOKUP_TABLES_RESOURCE_INTERFACE_TEMPLATES_H_ diff --git a/tensorflow/core/ops/lookup_table_ops.cc b/tensorflow/core/ops/lookup_table_ops.cc new file mode 100644 index 0000000000..3ce08f6f2f --- /dev/null +++ b/tensorflow/core/ops/lookup_table_ops.cc @@ -0,0 +1,61 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; + +REGISTER_OP("LookupTableInsertOrAssignOp") + .Input("table_int64_args: num_int64_table_args * int64") + .Input("table_handle: resource") + .Input("keys: insert_key_tensor_dtype") + .Input("values: table_value_dtype") + .Attr("insert_key_tensor_dtype: type") + .Attr("table_value_dtype: type") + .Attr("num_int64_table_args: int >= 0") + .SetShapeFn([](InferenceContext* c) { + // Note that, by design, shape checks are implementation dependent so they + // must be deferred until runtime. + return Status::OK(); + }); + +REGISTER_OP("LookupTableFindOp") + .Input("table_int64_args: num_int64_table_args * int64") + .Input("table_handle: resource") + .Input("keys: lookup_key_tensor_dtype") + .Input("num_threads: int64") + .Output("values: table_value_dtype") + .Attr("table_value_dtype: type") + .Attr("lookup_key_tensor_dtype: type") + .Attr("num_int64_table_args: int >= 0") + .SetShapeFn([](InferenceContext* c) { + // The output shape cannot be inferred here because the key size + // cannot be inferred from the key tensor in general. + c->set_output(0, c->UnknownShape()); + return Status::OK(); + }); + +REGISTER_OP("ContainerSizeOp") + .Input("container_handle: resource") + .Output("size: int64") + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Scalar()); + return Status::OK(); + }); + +} // namespace tensorflow -- GitLab From 03ebaa77ac69aed092dd899c1a68331b86d0d4af Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 16:45:17 -0800 Subject: [PATCH 166/351] Fixed typo. PiperOrigin-RevId: 234053835 --- tensorflow/examples/udacity/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md index c8ab24871c..b3bd73a08b 100644 --- a/tensorflow/examples/udacity/README.md +++ b/tensorflow/examples/udacity/README.md @@ -121,7 +121,7 @@ History * 0.1.0: Initial release. * 0.2.0: Many fixes, including lower memory footprint and support for Python 3. * 0.3.0: Use 0.7.1 release. -* 0.4.0: Move notMMNIST data for Google Cloud. +* 0.4.0: Move notMNIST data for Google Cloud. * 0.5.0: Actually use 0.7.1 release. * 0.6.0: Update to TF 0.10.0, add libjpeg (for Pillow). * 1.0.0: Update to TF 1.0.0 release. -- GitLab From 93e707396e890808b36dd8ff97cf9006eac597b0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 16:45:32 -0800 Subject: [PATCH 167/351] Updates the configure Python script to support building Bazel rules on Apple platforms. PiperOrigin-RevId: 234053872 --- configure.py | 33 ++ .../examples/ios/benchmark/ios_image_load.h | 2 +- .../ios/camera/CameraExampleViewController.h | 4 +- .../examples/ios/camera/ios_image_load.h | 2 +- .../examples/ios/camera/tensorflow_utils.h | 4 +- .../examples/ios/simple/ios_image_load.h | 2 +- .../ios/camera/CameraExampleViewController.mm | 18 +- tensorflow/lite/experimental/objc/BUILD.apple | 104 +++++ tensorflow/lite/experimental/objc/README.md | 52 +++ .../Configs/TensorFlowLiteObjc.tulsigen | 60 +++ .../project.tulsiconf | 17 + .../experimental/objc/apis/TFLInterpreter.h | 179 ++++++++ .../objc/apis/TFLInterpreterOptions.h | 37 ++ .../objc/apis/TFLQuantizationParameters.h | 36 ++ .../lite/experimental/objc/apis/TFLTensor.h | 111 +++++ .../experimental/objc/sources/TFLErrorUtil.h | 40 ++ .../experimental/objc/sources/TFLErrorUtil.m | 38 ++ .../objc/sources/TFLInterpreter+Internal.h | 63 +++ .../objc/sources/TFLInterpreter.mm | 407 ++++++++++++++++++ .../objc/sources/TFLInterpreterOptions.m | 30 ++ .../TFLQuantizationParameters+Internal.h | 33 ++ .../objc/sources/TFLQuantizationParameters.m | 36 ++ .../objc/sources/TFLTensor+Internal.h | 74 ++++ .../experimental/objc/sources/TFLTensor.m | 103 +++++ .../objc/tests/TFLInterpreterOptionsTests.m | 49 +++ .../objc/tests/TFLInterpreterTests.m | 358 +++++++++++++++ .../tests/TFLQuantizationParametersTests.m | 48 +++ .../lite/experimental/swift/BUILD.apple | 101 +++++ tensorflow/lite/experimental/swift/LICENSE | 202 +++++++++ tensorflow/lite/experimental/swift/README.md | 76 ++++ .../swift/Sources/Interpreter.swift | 265 ++++++++++++ .../swift/Sources/InterpreterError.swift | 99 +++++ .../swift/Sources/InterpreterOptions.swift | 29 ++ .../experimental/swift/Sources/Model.swift | 40 ++ .../Sources/QuantizationParameters.swift | 38 ++ .../experimental/swift/Sources/Tensor.swift | 138 ++++++ .../Configs/TensorFlowLite.tulsigen | 57 +++ .../project.tulsiconf | 14 + .../project.pbxproj | 345 +++++++++++++++ .../TensorFlowLiteApp/AppDelegate.swift | 24 ++ .../Array+TensorFlowLite.swift | 22 + .../AppIcon.appiconset/Contents.json | 98 +++++ .../Assets.xcassets/Contents.json | 6 + .../Base.lproj/LaunchScreen.storyboard | 44 ++ .../Base.lproj/Main.storyboard | 95 ++++ .../Data+TensorFlowLite.swift | 13 + .../TensorFlowLiteApp/Info.plist | 46 ++ .../TensorFlowLiteApp/ViewController.swift | 299 +++++++++++++ .../swift/Tests/InterpreterOptionsTests.swift | 54 +++ .../swift/Tests/InterpreterTests.swift | 315 ++++++++++++++ .../experimental/swift/Tests/ModelTests.swift | 59 +++ .../Tests/QuantizationParametersTests.swift | 43 ++ .../swift/Tests/TensorTests.swift | 83 ++++ .../tools/pip_package/pip_smoke_test.py | 1 + 54 files changed, 4530 insertions(+), 16 deletions(-) create mode 100644 tensorflow/lite/experimental/objc/BUILD.apple create mode 100644 tensorflow/lite/experimental/objc/README.md create mode 100644 tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen create mode 100644 tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf create mode 100644 tensorflow/lite/experimental/objc/apis/TFLInterpreter.h create mode 100644 tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h create mode 100644 tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h create mode 100644 tensorflow/lite/experimental/objc/apis/TFLTensor.h create mode 100644 tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h create mode 100644 tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m create mode 100644 tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h create mode 100644 tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm create mode 100644 tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m create mode 100644 tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h create mode 100644 tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m create mode 100644 tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h create mode 100644 tensorflow/lite/experimental/objc/sources/TFLTensor.m create mode 100644 tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m create mode 100644 tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m create mode 100644 tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m create mode 100644 tensorflow/lite/experimental/swift/BUILD.apple create mode 100644 tensorflow/lite/experimental/swift/LICENSE create mode 100644 tensorflow/lite/experimental/swift/README.md create mode 100644 tensorflow/lite/experimental/swift/Sources/Interpreter.swift create mode 100644 tensorflow/lite/experimental/swift/Sources/InterpreterError.swift create mode 100644 tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift create mode 100644 tensorflow/lite/experimental/swift/Sources/Model.swift create mode 100644 tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift create mode 100644 tensorflow/lite/experimental/swift/Sources/Tensor.swift create mode 100644 tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen create mode 100644 tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist create mode 100644 tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift create mode 100644 tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift create mode 100644 tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift create mode 100644 tensorflow/lite/experimental/swift/Tests/ModelTests.swift create mode 100644 tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift create mode 100644 tensorflow/lite/experimental/swift/Tests/TensorTests.swift diff --git a/configure.py b/configure.py index 14fca1f732..61fa9feade 100644 --- a/configure.py +++ b/configure.py @@ -55,6 +55,12 @@ NCCL_LIB_PATHS = [ 'lib64/', 'lib/powerpc64le-linux-gnu/', 'lib/x86_64-linux-gnu/', '' ] +# List of files to be configured for using Bazel on Apple platforms. +APPLE_BAZEL_FILES = [ + 'tensorflow/lite/experimental/objc/BUILD', + 'tensorflow/lite/experimental/swift/BUILD' +] + if platform.machine() == 'ppc64le': _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/' else: @@ -1534,6 +1540,23 @@ def config_info_line(name, help_text): print('\t--config=%-12s\t# %s' % (name, help_text)) +def configure_apple_bazel_rules(): + """Configures Bazel rules for building on Apple platforms. + + Enables analyzing and building Apple Bazel rules on Apple platforms. This + function will only be executed if `is_macos()` is true. + """ + if not is_macos(): + return + for filepath in APPLE_BAZEL_FILES: + print( + 'Configuring %s file to analyze and build Bazel rules on Apple platforms.' + % filepath) + existing_filepath = os.path.join(_TF_WORKSPACE_ROOT, filepath + '.apple') + renamed_filepath = os.path.join(_TF_WORKSPACE_ROOT, filepath) + os.rename(existing_filepath, renamed_filepath) + + def main(): global _TF_WORKSPACE_ROOT global _TF_BAZELRC @@ -1574,6 +1597,8 @@ def main(): if is_macos(): environ_cp['TF_NEED_TENSORRT'] = '0' + else: + environ_cp['TF_CONFIGURE_APPLE_BAZEL_RULES'] = '0' # The numpy package on ppc64le uses OpenBLAS which has multi-threading # issues that lead to incorrect answers. Set OMP_NUM_THREADS=1 at @@ -1676,6 +1701,14 @@ def main(): create_android_ndk_rule(environ_cp) create_android_sdk_rule(environ_cp) + if get_var( + environ_cp, 'TF_CONFIGURE_APPLE_BAZEL_RULES', + 'Configure Bazel rules for Apple platforms', False, + ('Would you like to configure Bazel rules for building on Apple platforms?' + ), 'Configuring Bazel rules for Apple platforms.', + 'Not configuring Bazel rules for Apple platforms.'): + configure_apple_bazel_rules() + print('Preconfigured Bazel build configs. You can use any of the below by ' 'adding "--config=<>" to your build command. See .bazelrc for more ' 'details.') diff --git a/tensorflow/examples/ios/benchmark/ios_image_load.h b/tensorflow/examples/ios/benchmark/ios_image_load.h index 3f94984692..22ee785dc3 100644 --- a/tensorflow/examples/ios/benchmark/ios_image_load.h +++ b/tensorflow/examples/ios/benchmark/ios_image_load.h @@ -17,7 +17,7 @@ #include -#include "tensorflow/core/framework/types.h" +#include "third_party/tensorflow/core/framework/types.h" std::vector LoadImageFromFile(const char* file_name, int* out_width, diff --git a/tensorflow/examples/ios/camera/CameraExampleViewController.h b/tensorflow/examples/ios/camera/CameraExampleViewController.h index 0aefbc6eed..277b6e272d 100644 --- a/tensorflow/examples/ios/camera/CameraExampleViewController.h +++ b/tensorflow/examples/ios/camera/CameraExampleViewController.h @@ -16,8 +16,8 @@ #import #include -#include "tensorflow/core/public/session.h" -#include "tensorflow/core/util/memmapped_file_system.h" +#include "third_party/tensorflow/core/public/session.h" +#include "third_party/tensorflow/core/util/memmapped_file_system.h" @interface CameraExampleViewController : UIViewController -#include "tensorflow/core/framework/types.h" +#include "third_party/tensorflow/core/framework/types.h" std::vector LoadImageFromFile(const char* file_name, int* out_width, diff --git a/tensorflow/examples/ios/camera/tensorflow_utils.h b/tensorflow/examples/ios/camera/tensorflow_utils.h index 78bdb82aae..33e95b185c 100644 --- a/tensorflow/examples/ios/camera/tensorflow_utils.h +++ b/tensorflow/examples/ios/camera/tensorflow_utils.h @@ -18,8 +18,8 @@ #include #include -#include "tensorflow/core/public/session.h" -#include "tensorflow/core/util/memmapped_file_system.h" +#include "third_party/tensorflow/core/public/session.h" +#include "third_party/tensorflow/core/util/memmapped_file_system.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" // Reads a serialized GraphDef protobuf file from the bundle, typically diff --git a/tensorflow/examples/ios/simple/ios_image_load.h b/tensorflow/examples/ios/simple/ios_image_load.h index 0e0b771118..2d2ee78e99 100644 --- a/tensorflow/examples/ios/simple/ios_image_load.h +++ b/tensorflow/examples/ios/simple/ios_image_load.h @@ -17,7 +17,7 @@ #include -#include "tensorflow/core/framework/types.h" +#include "third_party/tensorflow/core/framework/types.h" std::vector LoadImageFromFile(const char* file_name, int* out_width, diff --git a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm index 4f6fcaa96c..c891cd0da3 100644 --- a/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm +++ b/tensorflow/lite/examples/ios/camera/CameraExampleViewController.mm @@ -24,17 +24,17 @@ #include #if TFLITE_USE_CONTRIB_LITE -#include "tensorflow/contrib/lite/kernels/register.h" -#include "tensorflow/contrib/lite/model.h" -#include "tensorflow/contrib/lite/op_resolver.h" -#include "tensorflow/contrib/lite/string_util.h" +#include "third_party/tensorflow/contrib/lite/kernels/register.h" +#include "third_party/tensorflow/contrib/lite/model.h" +#include "third_party/tensorflow/contrib/lite/op_resolver.h" +#include "third_party/tensorflow/contrib/lite/string_util.h" #else -#include "tensorflow/lite/kernels/register.h" -#include "tensorflow/lite/model.h" -#include "tensorflow/lite/op_resolver.h" -#include "tensorflow/lite/string_util.h" +#include "third_party/tensorflow/lite/kernels/register.h" +#include "third_party/tensorflow/lite/model.h" +#include "third_party/tensorflow/lite/op_resolver.h" +#include "third_party/tensorflow/lite/string_util.h" #if TFLITE_USE_GPU_DELEGATE -#include "tensorflow/lite/delegates/gpu/metal_delegate.h" +#include "third_party/tensorflow/lite/delegates/gpu/metal_delegate.h" #endif #endif diff --git a/tensorflow/lite/experimental/objc/BUILD.apple b/tensorflow/lite/experimental/objc/BUILD.apple new file mode 100644 index 0000000000..69d6985fb5 --- /dev/null +++ b/tensorflow/lite/experimental/objc/BUILD.apple @@ -0,0 +1,104 @@ +# TensorFlow Lite Objective-C API. + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_unit_test") + +SOURCES = glob([ + "sources/*.h", + "sources/*.m", + "sources/*.mm", +]) + +API_HEADERS = glob([ + "apis/*.h", +]) + +MINIMUM_OS_VERSION = "9.0" + +# Compiler flags for building regular non-test libraries. +RELEASE_COPTS = [ + # Enables language-specific warnings for Objective-C, Objective-C++, C, and C++. + "-Wall", + # Warns if functions, variables, and types marked with the deprecated attribute are being used. + "-Wdeprecated-declarations", + # Warns for errors in documentation. + "-Wdocumentation", + # Turns all warnings into errors. + "-Werror", + # Enables extra warning flags that are not enabled by -Wall. + "-Wextra", + # Warns if a global function is defined without a previous prototype declaration. + "-Wmissing-prototypes", + # From -Wextra. Disables warning when signed value is converted to unsigned value during comparison. + "-Wno-sign-compare", + # From -Wextra. Disables warning for unused parameters, which are common in delegate methods and block callbacks. + "-Wno-unused-parameter", + # Warns if a global or local variable or type declaration shadows another variable, parameter, type, class member, or instance variable. + "-Wshadow", + # Warns if a function is declared or defined without specifying the argument types. For a block with no args, use (void) instead of (). + "-Wstrict-prototypes", + # Warns if an @selector() expression is encountered with a method name that hasn't been defined yet. + "-Wundeclared-selector", + # Turn off warnings for headers not part of TensorFlow Lite Objective-C API. + "--system-header-prefix=tensorflow/lite/experimental/c/", +] + +# Compiler flags for building test libraries. +TEST_COPTS = RELEASE_COPTS + [ + # From -Wall. Disables warning when passing nil to a callee that requires a non-null argument. + "-Wno-nonnull", + # Disables warning when a global or local variable or type declaration shadows another. + "-Wno-shadow", +] + +objc_library( + name = "TensorFlowLite", + srcs = SOURCES, + hdrs = API_HEADERS, + copts = RELEASE_COPTS, + tags = ["manual"], + deps = [ + "//tensorflow/lite/experimental/c:c_api", + ], + alwayslink = 1, +) + +ios_unit_test( + name = "TensorFlowLiteTests", + size = "small", + minimum_os_version = MINIMUM_OS_VERSION, + tags = [ + "manual", + # These sanitizer tests are not supported by iOS build toolchain (b/74292221). + # Disabled these for iOS test targets. + "noasan", + "notsan", + "nomsan", + ], + deps = [":TensorFlowLiteTestsLib"], +) + +objc_library( + name = "TensorFlowLiteTestsLib", + testonly = 1, + srcs = glob([ + "tests/*.m", + ]), + hdrs = glob([ + "apis/*.h", + "sources/*.h", + "tests/*.h", + ]), + copts = TEST_COPTS, + resources = [ + "//tensorflow/lite:testdata/add.bin", + "//tensorflow/lite:testdata/add_quantized.bin", + ], + tags = ["manual"], + deps = [ + ":TensorFlowLite", + ], +) diff --git a/tensorflow/lite/experimental/objc/README.md b/tensorflow/lite/experimental/objc/README.md new file mode 100644 index 0000000000..2940e05240 --- /dev/null +++ b/tensorflow/lite/experimental/objc/README.md @@ -0,0 +1,52 @@ +# TensorFlow Lite Objective-C Library + +[TensorFlow Lite](https://www.tensorflow.org/lite/) is TensorFlow's lightweight +solution for Objective-C developers. It enables low-latency inference of +on-device machine learning models with a small binary size and fast performance +supporting hardware acceleration. + +## Getting Started + +### Bazel + +In your `BUILD` file, add the `TensorFlowLite` dependency: + +```python +objc_library( + deps = [ + "//tensorflow/lite/experimental/objc:TensorFlowLite", + ], +) +``` + +If you would like to build the Objective-C TensorFlow Lite library using Bazel on Apple +platforms, clone or download the [TensorFlow GitHub repo](https://github.com/tensorflow/tensorflow), +then navigate to the root `tensorflow` directory and execute the `configure.py` script: + +```shell +python configure.py +``` + +Follow the prompts and when asked to configure the Bazel rules for Apple +platforms, enter `y`. + +Build the `TensorFlowLite` Objective-C library target: + +```shell +bazel build tensorflow/lite/experimental/objc:TensorFlowLite +``` + +Build the `TensorFlowLiteTests` target: + +```shell +bazel test tensorflow/lite/experimental/objc:TensorFlowLiteTests +``` + +### Tulsi + +Open the `TensorFlowLiteObjc.tulsiproj` using the Tulsi application on Mac or by +running the following command in Terminal from the root source directory: + +```shell +generate_xcodeproj.sh --genconfig tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj:TensorFlowLiteObjC --outputfolder ~/path/to/xcodeproj +``` diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen new file mode 100644 index 0000000000..091ef4e2ea --- /dev/null +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/Configs/TensorFlowLiteObjc.tulsigen @@ -0,0 +1,60 @@ +{ + "sourceFilters" : [ + "tensorflow/lite", + "tensorflow/lite/experimental/c", + "tensorflow/lite/experimental/objc", + "tensorflow/lite/experimental/objc/apis", + "tensorflow/lite/experimental/objc/sources", + "tensorflow/lite/experimental/objc/tests", + "tensorflow/lite/kernels", + "tensorflow/lite/kernels/internal", + "tensorflow/lite/nnapi", + "tensorflow/lite/schema", + ], + "buildTargets" : [ + "//tensorflow/lite/experimental/objc:TensorFlowLite", + "//tensorflow/lite/experimental/objc:TensorFlowLiteTests", + ], + "projectName" : "TensorFlowLiteObjC", + "optionSet" : { + "LaunchActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsDebug" : { + "p" : "$(inherited)" + }, + "EnvironmentVariables" : { + "p" : "$(inherited)" + }, + "BuildActionPreActionScript" : { + "p" : "$(inherited)" + }, + "CommandlineArguments" : { + "p" : "$(inherited)" + }, + "TestActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsDebug" : { + "p" : "$(inherited)" + }, + "BuildActionPostActionScript" : { + "p" : "$(inherited)" + }, + "TestActionPostActionScript" : { + "p" : "$(inherited)" + }, + "LaunchActionPostActionScript" : { + "p" : "$(inherited)" + } + }, + "additionalFilePaths" : [ + "tensorflow/lite/experimental/objc/BUILD", + ] +} diff --git a/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf new file mode 100644 index 0000000000..0b6fedff3f --- /dev/null +++ b/tensorflow/lite/experimental/objc/TensorFlowLiteObjc.tulsiproj/project.tulsiconf @@ -0,0 +1,17 @@ +{ + "configDefaults" : { + "optionSet" : { + "BazelBuildOptionsDebug" : { + + }, + "BazelBuildOptionsRelease" : { + + }, + } + }, + "projectName" : "TensorFlowLiteObjC", + "packages" : [ + "tensorflow/lite/experimental/objc" + ], + "workspaceRoot" : "../../../../.." +} diff --git a/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h b/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h new file mode 100644 index 0000000000..3c06a4bc82 --- /dev/null +++ b/tensorflow/lite/experimental/objc/apis/TFLInterpreter.h @@ -0,0 +1,179 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@class TFLInterpreterOptions; +@class TFLTensor; + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLInterpreterErrorCode + * This enum specifies various error codes related to `TFLInterpreter`. + */ +typedef NS_ENUM(NSUInteger, TFLInterpreterErrorCode) { + /** Provided tensor index is invalid. */ + TFLInterpreterErrorCodeInvalidTensorIndex, + + /** Input data has invalid byte size. */ + TFLInterpreterErrorCodeInvalidInputByteSize, + + /** Provided shape is invalid. It must be a non-empty array of positive unsigned integers. */ + TFLInterpreterErrorCodeInvalidShape, + + /** Provided model cannot be loaded. */ + TFLInterpreterErrorCodeFailedToLoadModel, + + /** Failed to create `TFLInterpreter`. */ + TFLInterpreterErrorCodeFailedToCreateInterpreter, + + /** Failed to invoke `TFLInterpreter`. */ + TFLInterpreterErrorCodeFailedToInvoke, + + /** Failed to retrieve a tensor. */ + TFLInterpreterErrorCodeFailedToGetTensor, + + /** Invalid tensor. */ + TFLInterpreterErrorCodeInvalidTensor, + + /** Failed to resize an input tensor. */ + TFLInterpreterErrorCodeFailedToResizeInputTensor, + + /** Failed to copy data into an input tensor. */ + TFLInterpreterErrorCodeFailedToCopyDataToInputTensor, + + /** Copying data into an output tensor not allowed. */ + TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed, + + /** Failed to get data from a tensor. */ + TFLInterpreterErrorCodeFailedToGetDataFromTensor, + + /** Failed to allocate memory for tensors. */ + TFLInterpreterErrorCodeFailedToAllocateTensors, + + /** Operaton not allowed without allocating memory for tensors first. */ + TFLInterpreterErrorCodeAllocateTensorsRequired, + + /** Operaton not allowed without invoking the interpreter first. */ + TFLInterpreterErrorCodeInvokeInterpreterRequired, +}; + +/** + * A TensorFlow Lite model interpreter. + */ +@interface TFLInterpreter : NSObject + +/** The total number of input tensors. 0 if the interpreter creation failed. */ +@property(nonatomic, readonly) NSUInteger inputTensorCount; + +/** The total number of output tensors. 0 if the interpreter creation failed. */ +@property(nonatomic, readonly) NSUInteger outputTensorCount; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +/** + * Initializes a new TensorFlow Lite interpreter instance with the given model file path and the + * default interpreter options. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * @param error An optional error parameter populated when there is an error in initializing the + * interpreter. + * + * @return A new instance of `TFLInterpreter` with the given model and the default interpreter + * options. `nil` if there is an error in initializing the interpreter. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error; + +/** + * Initializes a new TensorFlow Lite interpreter instance with the given model file path and + * options. + * + * @param modelPath An absolute path to a TensorFlow Lite model file stored locally on the device. + * @param options Options to use for configuring the TensorFlow Lite interpreter. + * @param error An optional error parameter populated when there is an error in initializing the + * interpreter. + * + * @return A new instance of `TFLInterpreter` with the given model and options. `nil` if there is an + * error in initializing the interpreter. + */ +- (nullable instancetype)initWithModelPath:(NSString *)modelPath + options:(TFLInterpreterOptions *)options + error:(NSError **)error NS_DESIGNATED_INITIALIZER; + +/** + * Invokes the interpreter to run inference. + * + * @param error An optional error parameter populated when there is an error in invoking the + * interpreter. + * + * @return Whether the invocation is successful. Returns NO if an error occurred. + */ +- (BOOL)invokeWithError:(NSError **)error; + +/** + * Returns the input tensor at the given index. + * + * @param index The index of an input tensor. + * @param error An optional error parameter populated when there is an error in looking up the input + * tensor. + * + * @return The input tensor at the given index. `nil` if there is an error. See the `TFLTensor` + * class documentation for more details on the life expectancy between the returned tensor and + * this interpreter. + */ +- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Returns the output tensor at the given index. + * + * @param index The index of an output tensor. + * @param error An optional error parameter populated when there is an error in looking up the + * output tensor. + * + * @return The output tensor at the given index. `nil` if there is an error. See the `TFLTensor` + * class documentation for more details on the life expectancy between the returned tensor and + * this interpreter. + */ +- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Resizes the input tensor at the given index to the specified shape (an array of positive unsigned + * integers). + * + * @param index The index of an input tensor. + * @param shape Shape that the given input tensor should be resized to. It should be an array of + * positive unsigned integer(s) containing the size of each dimension. + * @param error An optional error parameter populated when there is an error in resizing the input + * tensor. + * + * @return Whether the input tensor was resized successfully. Returns NO if an error occurred. + */ +- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index + toShape:(NSArray *)shape + error:(NSError **)error; + +/** + * Allocates memory for tensors. + * + * @param error An optional error parameter populated when there is an error in allocating memory. + * + * @return Whether memory allocation is successful. Returns NO if an error occurred. + */ +- (BOOL)allocateTensorsWithError:(NSError **)error; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h b/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h new file mode 100644 index 0000000000..6461fbf017 --- /dev/null +++ b/tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h @@ -0,0 +1,37 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** Custom configuration options for a TensorFlow Lite interpreter. */ +@interface TFLInterpreterOptions : NSObject + +/** + * Maximum number of threads that the interpreter should run on. Defaults to 0 (unspecified, letting + * TensorFlow Lite to optimize the threading decision). + */ +@property(nonatomic) NSUInteger numberOfThreads; + +/** + * Initializes a new instance of `TFLInterpreterOptions`. + * + * @return A new instance of `TFLInterpreterOptions`. + */ +- (instancetype)init NS_DESIGNATED_INITIALIZER; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h b/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h new file mode 100644 index 0000000000..3d5cf793c5 --- /dev/null +++ b/tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h @@ -0,0 +1,36 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + * Parameters for asymmetric quantization. Quantized values can be converted to float values using: + * `realValue = scale * (quantizedValue - zeroPoint)`. + */ +@interface TFLQuantizationParameters : NSObject + +/** Scale of asymmetric quantization. */ +@property(nonatomic, readonly) float scale; + +/** Zero point of asymmetric quantization. */ +@property(nonatomic, readonly) int32_t zeroPoint; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/apis/TFLTensor.h b/tensorflow/lite/experimental/objc/apis/TFLTensor.h new file mode 100644 index 0000000000..dc710abf4e --- /dev/null +++ b/tensorflow/lite/experimental/objc/apis/TFLTensor.h @@ -0,0 +1,111 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +@class TFLQuantizationParameters; + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLTensorDataType + * This enum specifies supported TensorFlow Lite tensor data types. + */ +typedef NS_ENUM(NSUInteger, TFLTensorDataType) { + /** Tensor data type not available. This indicates an error with the model. */ + TFLTensorDataTypeNoType, + + /** 32-bit single precision floating point. */ + TFLTensorDataTypeFloat32, + + /** 32-bit signed integer. */ + TFLTensorDataTypeInt32, + + /** 8-bit unsigned integer. */ + TFLTensorDataTypeUInt8, + + /** 64-bit signed integer. */ + TFLTensorDataTypeInt64, + + /** Boolean. */ + TFLTensorDataTypeBool, + + /** 16-bit signed integer. */ + TFLTensorDataTypeInt16, + + /** 8-bit signed integer. */ + TFLTensorDataTypeInt8, +}; + +/** + * An input or output tensor in a TensorFlow Lite model. + * + * @warning Each `TFLTensor` instance is associated with a `TFLInterpreter` instance. Multiple + * `TFLTensor` instances of the same TensorFlow Lite model are associated with the same + * `TFLInterpreter` instance. As long as a `TFLTensor` instance is still in use, its associated + * `TFLInterpreter` instance will not be deallocated. + */ +@interface TFLTensor : NSObject + +/** Name of the tensor. */ +@property(nonatomic, readonly, copy) NSString *name; + +/** Data type of the tensor. */ +@property(nonatomic, readonly) TFLTensorDataType dataType; + +/** Parameters for asymmetric quantization. `nil` if the tensor does not use quantization. */ +@property(nonatomic, readonly, nullable) TFLQuantizationParameters *quantizationParameters; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +/** + * Copies the given data into an input tensor. This is allowed only for an input tensor and only + * before the interpreter is invoked; otherwise an error will be returned. + * + * @param data The data to set. The byte size of the data must match what's required by the input + * tensor. + * @param error An optional error parameter populated when there is an error in copying the data. + * + * @return Whether the data was copied into the input tensor successfully. Returns NO if an error + * occurred. + */ +- (BOOL)copyData:(NSData *)data error:(NSError **)error; + +/** + * Retrieves a copy of data in the tensor. For an output tensor, the data is only available after + * the interpreter invocation has successfully completed; otherwise an error will be returned. + * + * @param error An optional error parameter populated when there is an error in retrieving the data. + * + * @return A copy of data in the tensor. `nil` if there is an error in retrieving the data or the + * data is not available. + */ +- (nullable NSData *)dataWithError:(NSError **)error; + +/** + * Retrieves the shape of the tensor, an array of positive unsigned integers containing the size + * of each dimension. For example: the shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is + * [2, 2, 3] (i.e. an array of 2 arrays of 2 arrays of 3 numbers). + * + * @param error An optional error parameter populated when there is an error in retrieving the + * shape. + * + * @return The shape of the tensor. `nil` if there is an error in retrieving the shape. + */ +- (nullable NSArray *)shapeWithError:(NSError **)error; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h new file mode 100644 index 0000000000..ce8d50c896 --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.h @@ -0,0 +1,40 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Helper utility for error reporting. */ +@interface TFLErrorUtil : NSObject + +/** + * Creates and saves an interpreter error with the given error code and description. + * + * @param code Error code. + * @param description Error description. + * @param error Pointer to where to save the created error. If `nil`, no error will be saved. + */ ++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description + error:(NSError **)error; + +/** Unavailable. */ +- (instancetype)init NS_UNAVAILABLE; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m new file mode 100644 index 0000000000..aa973c7800 --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLErrorUtil.m @@ -0,0 +1,38 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "TFLErrorUtil.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Error domain of TensorFlow Lite interpreter related errors. */ +static NSString *const TFLInterpreterErrorDomain = @"org.tensorflow.lite.interpreter"; + +@implementation TFLErrorUtil + +#pragma mark - Public + ++ (void)saveInterpreterErrorWithCode:(TFLInterpreterErrorCode)code + description:(NSString *)description + error:(NSError **)error { + if (error) { + *error = [NSError errorWithDomain:TFLInterpreterErrorDomain + code:code + userInfo:@{NSLocalizedDescriptionKey : description}]; + } +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h new file mode 100644 index 0000000000..9b900c4f05 --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter+Internal.h @@ -0,0 +1,63 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h" + +@class TFLTensor; + +NS_ASSUME_NONNULL_BEGIN + +@interface TFLInterpreter (Internal) + +/** + * Copies the given data into the input tensor at the given index. This is allowed only before the + * interpreter is invoked. + * + * @param data The data to set. The byte size of the data must match what's required by the input + * tensor at the given index. + * @param index An input tensor index. + * @param error An optional error parameter populated when there is an error in setting the data. + * + * @return Whether the data was copied into the input tensor at the given index successfully. + * Returns NO if an error occurred. + */ +- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error; + +/** + * Retrieves a copy of the data from the given tensor. For an output tensor, the interpreter + * invocation has to complete before the data can be retrieved. + * + * @param tensor A tensor. + * @param error An optional error parameter populated when there is an error in getting the data. + * + * @return The data of the given tensor. `nil` if there is an error or data is not available. + */ +- (nullable NSData *)dataFromTensor:(TFLTensor *)tensor error:(NSError **)error; + +/** + * Retrieves the shape of the given tensor, an array of positive unsigned integer(s) containing the + * size of each dimension. For example: shape of [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]] is + * [2, 2, 3]. + * + * @param tensor An input or output tensor. + * @param error An optional error parameter populated when there is an error in retrieving the + * shape. + * + * @return The shape of the tensor. `nil` if there is an error in retrieving the shape. + */ +- (nullable NSArray *)shapeOfTensor:(TFLTensor *)tensor error:(NSError **)error; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm new file mode 100644 index 0000000000..a8ca982f6d --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreter.mm @@ -0,0 +1,407 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h" + +#import "TFLErrorUtil.h" +#import "TFLQuantizationParameters+Internal.h" +#import "TFLTensor+Internal.h" +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h" +#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h" + +#include "tensorflow/lite/experimental/c/c_api.h" + +NS_ASSUME_NONNULL_BEGIN + +/** + * Error reporter for TFLInterpreter. + * + * @param user_data User data. Not used. + * @param format Error message which may contain argument formatting specifiers. + * @param args Values of the arguments in the error message. + */ +static void TFLInterpreterErrorReporter(void *user_data, const char *format, va_list args) { + NSLog(@"%@", [[NSString alloc] initWithFormat:@(format) arguments:args]); +} + +@interface TFLInterpreter () + +/** TFL_Interpreter backed by C API. */ +@property(nonatomic, nullable) TFL_Interpreter *interpreter; + +@end + +@implementation TFLInterpreter + +#pragma mark - NSObject + +- (void)dealloc { + TFL_DeleteInterpreter(_interpreter); +} + +#pragma mark - Public + +- (nullable instancetype)initWithModelPath:(NSString *)modelPath error:(NSError **)error { + return [self initWithModelPath:modelPath + options:[[TFLInterpreterOptions alloc] init] + error:error]; +} + +- (nullable instancetype)initWithModelPath:(NSString *)modelPath + options:(TFLInterpreterOptions *)options + error:(NSError **)error { + self = [super init]; + + if (self != nil) { + TFL_Model *model = nullptr; + TFL_InterpreterOptions *cOptions = nullptr; + + @try { + const char *modelPathCString = modelPath.UTF8String; + NSString *pathErrorString = + [NSString stringWithFormat:@"Cannot load model from path (%@).", modelPath]; + if (modelPathCString == nullptr) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel + description:pathErrorString + error:error]; + return nil; + } + + model = TFL_NewModelFromFile(modelPathCString); + if (model == nullptr) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToLoadModel + description:pathErrorString + error:error]; + return nil; + } + + cOptions = TFL_NewInterpreterOptions(); + if (cOptions == nullptr) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter." + error:error]; + return nil; + } + + if (options.numberOfThreads > 0) { + TFL_InterpreterOptionsSetNumThreads(cOptions, (int32_t)options.numberOfThreads); + } + TFL_InterpreterOptionsSetErrorReporter(cOptions, TFLInterpreterErrorReporter, nullptr); + + _interpreter = TFL_NewInterpreter(model, cOptions); + if (_interpreter == nullptr) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter." + error:error]; + return nil; + } + + _inputTensorCount = (NSUInteger)TFL_InterpreterGetInputTensorCount(_interpreter); + _outputTensorCount = (NSUInteger)TFL_InterpreterGetOutputTensorCount(_interpreter); + if (_inputTensorCount <= 0 || _outputTensorCount <= 0) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCreateInterpreter + description:@"Failed to create the interpreter." + error:error]; + return nil; + } + } @finally { + TFL_DeleteInterpreterOptions(cOptions); + TFL_DeleteModel(model); + } + } + + return self; +} + +- (BOOL)invokeWithError:(NSError **)error { + if (TFL_InterpreterInvoke(self.interpreter) != kTfLiteOk) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToInvoke + description:@"Failed to invoke the interpreter." + error:error]; + return NO; + } + + return YES; +} + +- (nullable TFLTensor *)inputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { + return nil; + } + + return [self tensorOfType:TFLTensorTypeInput atIndex:index error:error]; +} + +- (nullable TFLTensor *)outputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + if (![self isValidTensorIndex:index belowLimit:self.outputTensorCount error:error]) { + return nil; + } + + return [self tensorOfType:TFLTensorTypeOutput atIndex:index error:error]; +} + +- (BOOL)resizeInputTensorAtIndex:(NSUInteger)index + toShape:(NSArray *)shape + error:(NSError **)error { + if (![self isValidTensorIndex:index belowLimit:self.inputTensorCount error:error]) { + return NO; + } + + if (shape.count == 0) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape + description:@"Invalid shape. Must not be empty." + error:error]; + return NO; + } + + int cDimensions[self.inputTensorCount]; + for (int dimIndex = 0; dimIndex < shape.count; ++dimIndex) { + int dimension = shape[dimIndex].intValue; + if (dimension <= 0) { + NSString *errorDescription = @"Invalid shape. Dimensions must be positive integers."; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidShape + description:errorDescription + error:error]; + return NO; + } + cDimensions[dimIndex] = dimension; + } + + if (TFL_InterpreterResizeInputTensor(self.interpreter, (int32_t)index, cDimensions, + (int32_t)shape.count) != kTfLiteOk) { + NSString *errorDescription = [NSString + stringWithFormat:@"Failed to resize input tensor at index (%lu).", (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToResizeInputTensor + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +- (BOOL)allocateTensorsWithError:(NSError **)error { + if (TFL_InterpreterAllocateTensors(self.interpreter) != kTfLiteOk) { + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToAllocateTensors + description:@"Failed to allocate memory for tensors." + error:error]; + return NO; + } + return YES; +} + +#pragma mark - TFLInterpreter (Internal) + +- (BOOL)copyData:(NSData *)data toInputTensorAtIndex:(NSUInteger)index error:(NSError **)error { + const TFL_Tensor *cTensor = [self cTensorOfType:TFLTensorTypeInput atIndex:index error:error]; + if (cTensor == nullptr) { + return NO; + } + + NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(cTensor); + if (data.length != byteSize) { + NSString *errorDescription = [NSString + stringWithFormat:@"Input tensor at index (%lu) expects data size (%lu), but got (%lu).", + (unsigned long)index, byteSize, (unsigned long)data.length]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidInputByteSize + description:errorDescription + error:error]; + return NO; + } + + if (TFL_TensorCopyFromBuffer((TFL_Tensor *)cTensor, data.bytes, data.length) != kTfLiteOk) { + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to copy data into input tensor at index (%lu).", + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToCopyDataToInputTensor + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +- (nullable NSData *)dataFromTensor:(TFLTensor *)tensor error:(NSError **)error { + const TFL_Tensor *cTensor = [self cTensorOfType:tensor.type atIndex:tensor.index error:error]; + if (cTensor == nullptr) { + return nil; + } + + void *bytes = TFL_TensorData(cTensor); + NSUInteger byteSize = (NSUInteger)TFL_TensorByteSize(cTensor); + if (bytes == nullptr || byteSize == 0) { + NSString *tensorType = [TFLTensor stringForTensorType:tensor.type]; + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to get data from %@ tensor at index (%lu).", tensorType, + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetDataFromTensor + description:errorDescription + error:error]; + return nil; + } + + return [NSData dataWithBytes:bytes length:byteSize]; +} + +- (nullable NSArray *)shapeOfTensor:(TFLTensor *)tensor error:(NSError **)error { + const TFL_Tensor *cTensor = [self cTensorOfType:tensor.type atIndex:tensor.index error:error]; + if (cTensor == nullptr) { + return nil; + } + + NSString *tensorType = [TFLTensor stringForTensorType:tensor.type]; + int32_t rank = TFL_TensorNumDims(cTensor); + if (rank <= 0) { + NSString *errorDescription = + [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid rank (%d).", tensorType, + (unsigned long)index, rank]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor + description:errorDescription + error:error]; + return nil; + } + + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:rank]; + for (int32_t dimIndex = 0; dimIndex < rank; dimIndex++) { + int32_t dimension = TFL_TensorDim(cTensor, dimIndex); + if (dimension <= 0) { + NSString *errorDescription = + [NSString stringWithFormat:@"%@ tensor at index (%lu) has invalid %d-th dimension (%d).", + tensorType, (unsigned long)index, dimIndex, dimension]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor + description:errorDescription + error:error]; + return nil; + } + shape[dimIndex] = @((NSUInteger)dimension); + } + + return shape; +} + +#pragma mark - Private + +- (const TFL_Tensor *)cTensorOfType:(TFLTensorType)type + atIndex:(NSUInteger)index + error:(NSError **)error { + const TFL_Tensor *tensor = nullptr; + + switch (type) { + case TFLTensorTypeInput: + tensor = TFL_InterpreterGetInputTensor(self.interpreter, (int32_t)index); + break; + case TFLTensorTypeOutput: + tensor = TFL_InterpreterGetOutputTensor(self.interpreter, (int32_t)index); + break; + } + + if (tensor == nullptr) { + NSString *tensorType = [TFLTensor stringForTensorType:type]; + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to get %@ tensor at index (%lu).", tensorType, + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeFailedToGetTensor + description:errorDescription + error:error]; + } + + return tensor; +} + +- (nullable TFLTensor *)tensorOfType:(TFLTensorType)type + atIndex:(NSUInteger)index + error:(NSError **)error { + const TFL_Tensor *tensor = [self cTensorOfType:type atIndex:index error:error]; + + if (tensor == nullptr) { + return nil; + } + + NSString *tensorType = [TFLTensor stringForTensorType:type]; + const char *cName = TFL_TensorName(tensor); + if (cName == nullptr) { + NSString *errorDescription = + [NSString stringWithFormat:@"Failed to get name of %@ tensor at index (%lu).", tensorType, + (unsigned long)index]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensor + description:errorDescription + error:error]; + return nil; + } + NSString *name = [NSString stringWithUTF8String:cName]; + + TFLTensorDataType dataType = [self tensorDataTypeFromCTensorType:TFL_TensorType(tensor)]; + + TFL_QuantizationParams cParams = TFL_TensorQuantizationParams(tensor); + TFLQuantizationParameters *quantizationParams; + + // TODO(b/119735362): Update this check once the TFL_QuantizationParams struct has a mode. + if (cParams.scale != 0.0) { + quantizationParams = [[TFLQuantizationParameters alloc] initWithScale:cParams.scale + zeroPoint:cParams.zero_point]; + } + + // TODO: Set quantization parameters when C API supports it. + return [[TFLTensor alloc] initWithInterpreter:self + type:type + index:index + name:name + dataType:dataType + quantizationParameters:quantizationParams]; +} + +- (TFLTensorDataType)tensorDataTypeFromCTensorType:(TFL_Type)cTensorType { + switch (cTensorType) { + case kTfLiteFloat32: + return TFLTensorDataTypeFloat32; + case kTfLiteInt32: + return TFLTensorDataTypeInt32; + case kTfLiteUInt8: + return TFLTensorDataTypeUInt8; + case kTfLiteInt8: + return TFLTensorDataTypeInt8; + case kTfLiteInt64: + return TFLTensorDataTypeInt64; + case kTfLiteBool: + return TFLTensorDataTypeBool; + case kTfLiteInt16: + return TFLTensorDataTypeInt16; + case kTfLiteNoType: + case kTfLiteString: + case kTfLiteComplex64: + // kTfLiteString and kTfLiteComplex64 are not supported in TensorFlow Lite Objc API. + return TFLTensorDataTypeNoType; + } +} + +- (BOOL)isValidTensorIndex:(NSUInteger)index + belowLimit:(NSUInteger)totalTensorCount + error:(NSError **)error { + if (index >= totalTensorCount) { + NSString *errorDescription = + [NSString stringWithFormat:@"Invalid tensor index (%lu) exceeds max (%lu).", + (unsigned long)index, (unsigned long)(totalTensorCount - 1)]; + [TFLErrorUtil saveInterpreterErrorWithCode:TFLInterpreterErrorCodeInvalidTensorIndex + description:errorDescription + error:error]; + return NO; + } + + return YES; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m b/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m new file mode 100644 index 0000000000..d129befeca --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLInterpreterOptions.m @@ -0,0 +1,30 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h" + +NS_ASSUME_NONNULL_BEGIN + +@implementation TFLInterpreterOptions + +#pragma mark - Public + +- (instancetype)init { + self = [super init]; + return self; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h new file mode 100644 index 0000000000..37d9ef0bb4 --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h @@ -0,0 +1,33 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h" + +NS_ASSUME_NONNULL_BEGIN + +@interface TFLQuantizationParameters (Internal) + +/** + * Initializes a `TFLQuantizationParameters` instance with the given scale and zero point. + * + * @param scale Scale of asymmetric quantization. + * @param zeroPoint Zero point of asymmetric quantization. + * + * @return A new instance of `TFLQuantizationParameters` with the given scale and zero point. + */ +- (instancetype)initWithScale:(float)scale zeroPoint:(int32_t)zeroPoint; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m new file mode 100644 index 0000000000..44cb90d332 --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters.m @@ -0,0 +1,36 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h" + +#import "TFLQuantizationParameters+Internal.h" + +NS_ASSUME_NONNULL_BEGIN + +@implementation TFLQuantizationParameters + +#pragma mark - TFLTensor (Internal) + +- (instancetype)initWithScale:(float)scale zeroPoint:(int32_t)zeroPoint { + self = [super init]; + if (self != nil) { + _scale = scale; + _zeroPoint = zeroPoint; + } + return self; +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h b/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h new file mode 100644 index 0000000000..3d5c51caab --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLTensor+Internal.h @@ -0,0 +1,74 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h" + +@class TFLInterpreter; + +NS_ASSUME_NONNULL_BEGIN + +/** + * @enum TFLTensorType + * This enum specifies input or output tensor types. + */ +typedef NS_ENUM(NSUInteger, TFLTensorType) { + /** Input tensor type. */ + TFLTensorTypeInput, + + /** Output tensor type. */ + TFLTensorTypeOutput, +}; + +@interface TFLTensor (Internal) + +/** Input or output tensor type. */ +@property(nonatomic, readonly) TFLTensorType type; + +/** Index of the tensor. */ +@property(nonatomic, readonly) NSUInteger index; + +/** + * Initializes a `TFLTensor` with the given interpreter, name, data type, and quantization + * parameters. + * + * @param interpreter Interpreter backing the tensor. + * @param type Input or output tensor type. + * @param index Index of the tensor. + * @param name Name of the tensor. + * @param dataType Data type of the tensor. + * @param quantizationParameters Quantization parameters of the tensor. `nil` if the tensor does not + * use quantization. + * + * @return A new instance of `TFLTensor` with the given name, data type, shape, and quantization + * parameters. + */ +- (instancetype)initWithInterpreter:(TFLInterpreter *)interpreter + type:(TFLTensorType)type + index:(NSUInteger)index + name:(NSString *)name + dataType:(TFLTensorDataType)dataType + quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters; + +/** + * Returns the string name of the given input or output tensor type. + * + * @param type Input or output tensor type. + * + * @return The string name of the given input or output tensor type. + */ ++ (NSString *)stringForTensorType:(TFLTensorType)type; + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/sources/TFLTensor.m b/tensorflow/lite/experimental/objc/sources/TFLTensor.m new file mode 100644 index 0000000000..2eaebfd6be --- /dev/null +++ b/tensorflow/lite/experimental/objc/sources/TFLTensor.m @@ -0,0 +1,103 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h" + +#import "TFLErrorUtil.h" +#import "TFLInterpreter+Internal.h" +#import "TFLTensor+Internal.h" + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h" + +NS_ASSUME_NONNULL_BEGIN + +// String names of input or output tensor types. +static NSString *const kTFLInputTensorTypeString = @"input"; +static NSString *const kTFLOutputTensorTypeString = @"output"; + +@interface TFLTensor () + +// Redefines readonly properties. +@property(nonatomic) TFLTensorType type; +@property(nonatomic) NSUInteger index; +@property(nonatomic, copy) NSString *name; +@property(nonatomic) TFLTensorDataType dataType; +@property(nonatomic, nullable) TFLQuantizationParameters *quantizationParameters; + +/** + * The backing interpreter. It's a strong reference to ensure that the interpreter is never released + * before this tensor is released. + * + * @warning Never let the interpreter hold a strong reference to the tensor to avoid retain cycles. + */ +@property(nonatomic) TFLInterpreter *interpreter; + +@end + +@implementation TFLTensor + +#pragma mark - Public + +- (BOOL)copyData:(NSData *)data error:(NSError **)error { + if (self.type == TFLTensorTypeOutput) { + [TFLErrorUtil + saveInterpreterErrorWithCode:TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed + description:@"Cannot copy data into an output tensor." + error:error]; + return NO; + } + + return [self.interpreter copyData:data toInputTensorAtIndex:self.index error:error]; +} + +- (nullable NSData *)dataWithError:(NSError **)error { + return [self.interpreter dataFromTensor:self error:error]; +} + +- (nullable NSArray *)shapeWithError:(NSError **)error { + return [self.interpreter shapeOfTensor:self error:error]; +} + +#pragma mark - TFLTensor (Internal) + +- (instancetype)initWithInterpreter:(TFLInterpreter *)interpreter + type:(TFLTensorType)type + index:(NSUInteger)index + name:(NSString *)name + dataType:(TFLTensorDataType)dataType + quantizationParameters:(nullable TFLQuantizationParameters *)quantizationParameters { + self = [super init]; + if (self != nil) { + _interpreter = interpreter; + _type = type; + _index = index; + _name = [name copy]; + _dataType = dataType; + _quantizationParameters = quantizationParameters; + } + return self; +} + ++ (NSString *)stringForTensorType:(TFLTensorType)type { + switch (type) { + case TFLTensorTypeInput: + return kTFLInputTensorTypeString; + case TFLTensorTypeOutput: + return kTFLOutputTensorTypeString; + } +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m b/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m new file mode 100644 index 0000000000..00b800d6af --- /dev/null +++ b/tensorflow/lite/experimental/objc/tests/TFLInterpreterOptionsTests.m @@ -0,0 +1,49 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h" + +#import + +NS_ASSUME_NONNULL_BEGIN + +/** + * Unit tests for TFLInterpreterOptions. + */ +@interface TFLInterpreterOptionsTests : XCTestCase +@end + +@implementation TFLInterpreterOptionsTests + +#pragma mark - Tests + +- (void)testInit { + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + XCTAssertNotNil(options); + XCTAssertEqual(options.numberOfThreads, 0); +} + +- (void)testSetNumberOfThread { + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + options.numberOfThreads = 2; + XCTAssertEqual(options.numberOfThreads, 2); + options.numberOfThreads = 0; + XCTAssertEqual(options.numberOfThreads, 0); + options.numberOfThreads = 3; + XCTAssertEqual(options.numberOfThreads, 3); +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m b/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m new file mode 100644 index 0000000000..eefa9b9f05 --- /dev/null +++ b/tensorflow/lite/experimental/objc/tests/TFLInterpreterTests.m @@ -0,0 +1,358 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreter.h" + +#import + +#import "tensorflow/lite/experimental/objc/apis/TFLInterpreterOptions.h" +#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h" +#import "tensorflow/lite/experimental/objc/apis/TFLTensor.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Float model resource name. */ +static NSString *const kAddFloatModelResourceName = @"add"; + +/** Quantized model resource name. */ +static NSString *const kAddQuantizedModelResourceName = @"add_quantized"; + +/** Model resource type. */ +static NSString *const kAddModelResourceType = @"bin"; + +/** Rank of the input and output tensor in the Add model. */ +static const NSUInteger kAddModelTensorRank = 1U; + +/** Size of the first (and only) dimension of the input and output tensor in the Add model. */ +static const NSUInteger kAddModelTensorFirstDimensionSize = 2U; + +/** Quantization scale of the quantized model. */ +static const float kAddQuantizedModelScale = 0.003922F; + +/** Quantization zero point of the quantized model. */ +static const int32_t kAddQuantizedModelZeroPoint = 0; + +/** Invalid input tensor index. */ +static const NSUInteger kInvalidInputTensorIndex = 1U; + +/** Invalid output tensor index. */ +static const NSUInteger kInvalidOutputTensorIndex = 1U; + +/** Accurary used in comparing floating numbers. */ +static const float kTestAccuracy = 1E-5F; + +/** + * Unit tests for TFLInterpreter. + */ +@interface TFLInterpreterTests : XCTestCase + +/** Absolute path of the Add float model resource. */ +@property(nonatomic, nullable) NSString *floatModelPath; + +/** Default interpreter using the Add model. */ +@property(nonatomic, nullable) TFLInterpreter *interpreter; + +@end + +@implementation TFLInterpreterTests + +#pragma mark - XCTestCase + +- (void)setUp { + [super setUp]; + + NSBundle *bundle = [NSBundle bundleForClass:[self class]]; + self.floatModelPath = [bundle pathForResource:kAddFloatModelResourceName + ofType:kAddModelResourceType]; + NSError *error; + self.interpreter = [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath error:&error]; + XCTAssertNil(error); + XCTAssertNotNil(self.interpreter); + XCTAssertTrue([self.interpreter allocateTensorsWithError:nil]); +} + +- (void)tearDown { + self.floatModelPath = nil; + self.interpreter = nil; + + [super tearDown]; +} + +#pragma mark - Tests + +- (void)testSuccessfulFullRunAddFloatModel { + // Shape for both input and output tensor. + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + + // Creates the interpreter options. + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + XCTAssertNotNil(options); + options.numberOfThreads = 2; + + // Creates the interpreter. + NSError *error; + TFLInterpreter *customInterpreter = [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath + options:options + error:&error]; + XCTAssertNil(error); + XCTAssertNotNil(customInterpreter); + + // Allocates memory for tensors. + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies input and output tensor counts. + XCTAssertEqual(customInterpreter.inputTensorCount, 1); + XCTAssertEqual(customInterpreter.outputTensorCount, 1); + + // Resizes the intput tensor. + XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertNil(error); + + // Re-allocates memory for tensors. + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies the input tensor. + TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(inputTensor); + XCTAssertNil(error); + XCTAssertTrue([inputTensor.name isEqualToString:@"input"]); + XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeFloat32); + NSArray *inputTensorShape = [inputTensor shapeWithError:&error]; + XCTAssertNil(error); + XCTAssertTrue([shape isEqualToArray:inputTensorShape]); + + // Copies the input data. + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [inputData appendBytes:&one length:sizeof(float)]; + [inputData appendBytes:&three length:sizeof(float)]; + XCTAssertTrue([inputTensor copyData:inputData error:&error]); + XCTAssertNil(error); + + // Invokes the interpreter. + XCTAssertTrue([customInterpreter invokeWithError:&error]); + XCTAssertNil(error); + + // Verifies the output tensor. + TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(outputTensor); + XCTAssertNil(error); + XCTAssertTrue([outputTensor.name isEqualToString:@"output"]); + XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeFloat32); + NSArray *outputTensorShape = [outputTensor shapeWithError:&error]; + XCTAssertNil(error); + XCTAssertTrue([shape isEqualToArray:outputTensorShape]); + + // Tries to query an invalid output tensor index. + TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex + error:&error]; + XCTAssertNil(invalidOutputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); + + // Gets the output tensor data. + error = nil; + NSData *outputData = [outputTensor dataWithError:&error]; + XCTAssertNotNil(outputData); + XCTAssertNil(error); + float output[kAddModelTensorFirstDimensionSize]; + [outputData getBytes:output length:(sizeof(float) * kAddModelTensorFirstDimensionSize)]; + XCTAssertEqualWithAccuracy(output[0], 3.f, kTestAccuracy); + XCTAssertEqualWithAccuracy(output[1], 9.f, kTestAccuracy); +} + +- (void)testSuccessfulFullRunQuantizedModel { + // Shape for both input and output tensor. + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + + // Creates the interpreter options. + TFLInterpreterOptions *options = [[TFLInterpreterOptions alloc] init]; + XCTAssertNotNil(options); + options.numberOfThreads = 2; + + NSBundle *bundle = [NSBundle bundleForClass:[self class]]; + NSString *quantizedModelPath = [bundle pathForResource:kAddQuantizedModelResourceName + ofType:kAddModelResourceType]; + + // Creates the interpreter. + NSError *error; + TFLInterpreter *customInterpreter = + [[TFLInterpreter alloc] initWithModelPath:quantizedModelPath options:options error:&error]; + XCTAssertNil(error); + XCTAssertNotNil(customInterpreter); + + // Allocates memory for tensors. + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies input and output tensor counts. + XCTAssertEqual(customInterpreter.inputTensorCount, 1); + XCTAssertEqual(customInterpreter.outputTensorCount, 1); + + // Resizes the intput tensor. + XCTAssertTrue([customInterpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertNil(error); + + // Re-allocates memory for tensors. + XCTAssertTrue([customInterpreter allocateTensorsWithError:&error]); + XCTAssertNil(error); + + // Verifies the input tensor. + TFLTensor *inputTensor = [customInterpreter inputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(inputTensor); + XCTAssertNil(error); + XCTAssertTrue([inputTensor.name isEqualToString:@"input"]); + XCTAssertEqual(inputTensor.dataType, TFLTensorDataTypeUInt8); + XCTAssertEqualWithAccuracy(inputTensor.quantizationParameters.scale, kAddQuantizedModelScale, + kTestAccuracy); + XCTAssertEqual(inputTensor.quantizationParameters.zeroPoint, kAddQuantizedModelZeroPoint); + NSArray *inputTensorShape = [inputTensor shapeWithError:&error]; + XCTAssertNil(error); + XCTAssertTrue([shape isEqualToArray:inputTensorShape]); + + // Copies the input data. + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + uint8_t one = 1; + uint8_t three = 3; + [inputData appendBytes:&one length:sizeof(uint8_t)]; + [inputData appendBytes:&three length:sizeof(uint8_t)]; + XCTAssertTrue([inputTensor copyData:inputData error:&error]); + XCTAssertNil(error); + + // Invokes the interpreter. + XCTAssertTrue([customInterpreter invokeWithError:&error]); + XCTAssertNil(error); + + // Verifies the output tensor. + TFLTensor *outputTensor = [customInterpreter outputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(outputTensor); + XCTAssertNil(error); + XCTAssertTrue([outputTensor.name isEqualToString:@"output"]); + XCTAssertEqual(outputTensor.dataType, TFLTensorDataTypeUInt8); + XCTAssertEqualWithAccuracy(outputTensor.quantizationParameters.scale, kAddQuantizedModelScale, + kTestAccuracy); + XCTAssertEqual(outputTensor.quantizationParameters.zeroPoint, kAddQuantizedModelZeroPoint); + NSArray *outputTensorShape = [outputTensor shapeWithError:&error]; + XCTAssertNil(error); + XCTAssertTrue([shape isEqualToArray:outputTensorShape]); + + // Tries to query an invalid output tensor index. + TFLTensor *invalidOutputTensor = [customInterpreter outputTensorAtIndex:kInvalidOutputTensorIndex + error:&error]; + XCTAssertNil(invalidOutputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); + + // Gets the output tensor data. + error = nil; + NSData *outputData = [outputTensor dataWithError:&error]; + XCTAssertNotNil(outputData); + XCTAssertNil(error); + uint8_t output[kAddModelTensorFirstDimensionSize]; + [outputData getBytes:output length:(sizeof(uint8_t) * kAddModelTensorFirstDimensionSize)]; + XCTAssertEqual(output[0], 3); + XCTAssertEqual(output[1], 9); +} + +- (void)testInitWithModelPath_invalidPath { + // Shape for both input and output tensor. + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + + // Creates the interpreter. + NSError *error; + TFLInterpreter *brokenInterpreter = [[TFLInterpreter alloc] initWithModelPath:@"InvalidPath" + error:&error]; + XCTAssertNil(brokenInterpreter); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToLoadModel); +} + +- (void)testInvoke_beforeAllocation { + NSError *error; + TFLInterpreter *interpreterWithoutAllocation = + [[TFLInterpreter alloc] initWithModelPath:self.floatModelPath error:&error]; + XCTAssertNotNil(interpreterWithoutAllocation); + XCTAssertNil(error); + + XCTAssertFalse([interpreterWithoutAllocation invokeWithError:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeFailedToInvoke); +} + +- (void)testInputTensorAtIndex_invalidIndex { + NSError *error; + TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:kInvalidInputTensorIndex + error:&error]; + XCTAssertNil(inputTensor); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); +} + +- (void)testResizeInputTensorAtIndex_invalidIndex { + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:kAddModelTensorFirstDimensionSize]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:kInvalidInputTensorIndex + toShape:shape + error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidTensorIndex); +} + +- (void)testResizeInputTensorAtIndex_emptyShape { + NSMutableArray *emptyShape = [NSMutableArray arrayWithCapacity:0]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:emptyShape error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); +} + +- (void)testResizeInputTensorAtIndex_zeroDimensionSize { + NSMutableArray *shape = [NSMutableArray arrayWithCapacity:kAddModelTensorRank]; + shape[0] = [NSNumber numberWithUnsignedInteger:0]; + NSError *error; + XCTAssertFalse([self.interpreter resizeInputTensorAtIndex:0 toShape:shape error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidShape); +} + +- (void)testCopyDataToInputTensorAtIndex_invalidInputDataByteSize { + NSMutableData *inputData = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [inputData appendBytes:&one length:sizeof(float)]; + [inputData appendBytes:&three length:(sizeof(float) - 1)]; + NSError *error; + TFLTensor *inputTensor = [self.interpreter inputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(inputTensor); + XCTAssertNil(error); + XCTAssertFalse([inputTensor copyData:inputData error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeInvalidInputByteSize); +} + +- (void)testCopyDataToOutputTensorAtIndex_notAllowed { + NSMutableData *data = [NSMutableData dataWithCapacity:0]; + float one = 1.f; + float three = 3.f; + [data appendBytes:&one length:sizeof(float)]; + [data appendBytes:&three length:(sizeof(float) - 1)]; + NSError *error; + TFLTensor *outputTensor = [self.interpreter outputTensorAtIndex:0 error:&error]; + XCTAssertNotNil(outputTensor); + XCTAssertNil(error); + XCTAssertFalse([outputTensor copyData:data error:&error]); + XCTAssertEqual(error.code, TFLInterpreterErrorCodeCopyDataToOutputTensorNotAllowed); +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m b/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m new file mode 100644 index 0000000000..239e0bcb0d --- /dev/null +++ b/tensorflow/lite/experimental/objc/tests/TFLQuantizationParametersTests.m @@ -0,0 +1,48 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#import "tensorflow/lite/experimental/objc/apis/TFLQuantizationParameters.h" + +#import + +#import "tensorflow/lite/experimental/objc/sources/TFLQuantizationParameters+Internal.h" + +NS_ASSUME_NONNULL_BEGIN + +/** Test scale of quantization parameters. */ +static const float kTestScale = 2.0; + +/** Test zero point of quantization parameters. */ +static const int32_t kTestZeroPoint = 128; + +/** + * Unit tests for TFLQuantizationParameters. + */ +@interface TFLQuantizationParametersTests : XCTestCase +@end + +@implementation TFLQuantizationParametersTests + +#pragma mark - Tests + +- (void)testInitWithScaleAndZeroPoint { + TFLQuantizationParameters *params = + [[TFLQuantizationParameters alloc] initWithScale:kTestScale zeroPoint:kTestZeroPoint]; + XCTAssertEqual(params.scale, kTestScale); + XCTAssertEqual(params.zeroPoint, kTestZeroPoint); +} + +@end + +NS_ASSUME_NONNULL_END diff --git a/tensorflow/lite/experimental/swift/BUILD.apple b/tensorflow/lite/experimental/swift/BUILD.apple new file mode 100644 index 0000000000..53bcb0ecbd --- /dev/null +++ b/tensorflow/lite/experimental/swift/BUILD.apple @@ -0,0 +1,101 @@ +# TensorFlow Lite for Swift. + +package(default_visibility = ["//visibility:private"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("@build_bazel_rules_apple//apple:ios.bzl", "ios_application", "ios_unit_test") +load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library") + +MINIMUM_OS_VERSION = "9.0" + +SWIFT_COPTS = [ + "-wmo", +] + +swift_library( + name = "TensorFlowLite", + srcs = glob(["Sources/*.swift"]), + copts = SWIFT_COPTS, + module_name = "TensorFlowLite", + tags = ["manual"], + deps = [ + "//tensorflow/lite/experimental/c:c_api", + ], +) + +ios_unit_test( + name = "TensorFlowLiteTests", + size = "small", + minimum_os_version = MINIMUM_OS_VERSION, + tags = [ + "manual", + # DISABLED: Following sanitizer tests are not supported by iOS test targets. + "noasan", + "nomsan", + "notsan", + ], + deps = [":TensorFlowLiteTestsLib"], +) + +swift_library( + name = "TensorFlowLiteTestsLib", + testonly = 1, + srcs = glob(["Tests/*.swift"]), + copts = SWIFT_COPTS, + tags = ["manual"], + deps = [ + ":TensorFlowLite", + ":TestResources", + ], +) + +objc_library( + name = "TestResources", + resources = [ + "//tensorflow/lite:testdata/add.bin", + "//tensorflow/lite:testdata/add_quantized.bin", + "//tensorflow/lite:testdata/multi_add.bin", + ], + tags = ["manual"], +) + +ios_application( + name = "TensorFlowLiteApp", + app_icons = glob(["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/**"]), + bundle_id = "com.tensorflow.lite.swift.TensorFlowLite", + families = [ + "ipad", + "iphone", + ], + infoplists = ["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist"], + launch_storyboard = "TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard", + minimum_os_version = MINIMUM_OS_VERSION, + sdk_frameworks = [ + "CoreGraphics", + ], + tags = ["manual"], + deps = [":TensorFlowLiteAppLib"], +) + +swift_library( + name = "TensorFlowLiteAppLib", + srcs = glob(["TestApps/TensorFlowLiteApp/TensorFlowLiteApp/*.swift"]), + module_name = "TensorFlowLiteAppLib", + tags = ["manual"], + deps = [ + ":TensorFlowLite", + ":TensorFlowLiteAppResources", + ], +) + +objc_library( + name = "TensorFlowLiteAppResources", + storyboards = glob([ + "TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/*.storyboard", + ]), + tags = ["manual"], + deps = [":TestResources"], +) diff --git a/tensorflow/lite/experimental/swift/LICENSE b/tensorflow/lite/experimental/swift/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/tensorflow/lite/experimental/swift/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tensorflow/lite/experimental/swift/README.md b/tensorflow/lite/experimental/swift/README.md new file mode 100644 index 0000000000..e3b195475d --- /dev/null +++ b/tensorflow/lite/experimental/swift/README.md @@ -0,0 +1,76 @@ +# TensorFlow Lite for Swift + +[TensorFlow Lite](https://www.tensorflow.org/lite/) is TensorFlow's lightweight +solution for Swift developers. It enables low-latency inference of on-device +machine learning models with a small binary size and fast performance supporting +hardware acceleration. + +## Getting Started + +### Bazel + +In your `BUILD` file, add the `TensorFlowLite` dependency: + +```python +swift_library( + deps = [ + "//tensorflow/lite/experimental/swift:TensorFlowLite", + ], +) +``` + +In your Swift files, import the module: + +```swift +import TensorFlowLite +``` + +If you would like to build the Swift TensorFlow Lite library using Bazel on Apple +platforms, clone or download the [TensorFlow GitHub repo](https://github.com/tensorflow/tensorflow), +then navigate to the root `tensorflow` directory and execute the `configure.py` script: + +```shell +python configure.py +``` + +Follow the prompts and when asked to configure the Bazel rules for Apple +platforms, enter `y`. + +Build the `TensorFlowLite` Swift library target: + +```shell +bazel build tensorflow/lite/experimental/swift:TensorFlowLite +``` + +Build the `TensorFlowLiteTests` target: + +```shell +bazel test tensorflow/lite/experimental/swift:TensorFlowLiteTests --swiftcopt=-enable-testing +``` + +### Tulsi + +Open the `TensorFlowLite.tulsiproj` using the [TulsiApp](https://github.com/bazelbuild/tulsi) or by +running the [`generate_xcodeproj.sh`](https://github.com/bazelbuild/tulsi/blob/master/src/tools/generate_xcodeproj.sh) +script: + +```shell +generate_xcodeproj.sh --genconfig tensorflow/lite/swift/TensorFlowLite.tulsiproj:TensorFlowLite --outputfolder ~/path/to/generated/TensorFlowLite.xcodeproj +``` + +### CocoaPods + +Add the following to your `Podfile`: + +```ruby +use_frameworks! +pod 'TensorFlowLiteSwift' +``` + +Then, run `pod install`. + +In your Swift files, import the module: + +```swift +import TensorFlowLite +``` diff --git a/tensorflow/lite/experimental/swift/Sources/Interpreter.swift b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift new file mode 100644 index 0000000000..a14b5966b1 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/Interpreter.swift @@ -0,0 +1,265 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import TensorFlowLiteCAPI + +/// A TensorFlow Lite interpreter that performs inference from a given model. +public final class Interpreter { + + /// The `TFL_Interpreter` C pointer type represented as an `UnsafePointer`. + private typealias CInterpreter = OpaquePointer + + /// Total number of input tensors associated with the model. + public var inputTensorCount: Int { + return Int(TFL_InterpreterGetInputTensorCount(cInterpreter)) + } + + /// Total number of output tensors associated with the model. + public var outputTensorCount: Int { + return Int(TFL_InterpreterGetOutputTensorCount(cInterpreter)) + } + + /// The underlying `TFL_Interpreter` C pointer. + private var cInterpreter: CInterpreter? + + /// Creates a new model interpreter instance. + /// + /// - Parameters: + /// - modelPath: Local file path to a TensorFlow Lite model. + /// - options: Custom configurations for the interpreter. The default is `nil` indicating that + /// interpreter will determine the configuration options. + /// - Throws: An error if the model could not be loaded or the interpreter could not be created. + public init(modelPath: String, options: InterpreterOptions? = nil) throws { + guard let model = Model(filePath: modelPath) else { throw InterpreterError.failedToLoadModel } + + let cInterpreterOptions: OpaquePointer? = try options.map { options in + guard let cOptions = TFL_NewInterpreterOptions() else { + throw InterpreterError.failedToCreateInterpreter + } + if let threadCount = options.threadCount, threadCount > 0 { + TFL_InterpreterOptionsSetNumThreads(cOptions, Int32(threadCount)) + } + if options.isErrorLoggingEnabled { + TFL_InterpreterOptionsSetErrorReporter( + cOptions, + { (_, format, arguments) in + guard let cFormat = format, + let message = String(cFormat: cFormat, arguments: arguments) + else { + return + } + print(String(describing: InterpreterError.tensorFlowLiteError(message))) + }, + nil + ) + } + return cOptions + } + defer { TFL_DeleteInterpreterOptions(cInterpreterOptions) } + + guard let cInterpreter = TFL_NewInterpreter(model.cModel, cInterpreterOptions) else { + throw InterpreterError.failedToCreateInterpreter + } + self.cInterpreter = cInterpreter + } + + deinit { + TFL_DeleteInterpreter(cInterpreter) + } + + /// Invokes the interpreter to perform inference from the loaded graph. + /// + /// - Throws: An error if the model was not ready because tensors were not allocated. + public func invoke() throws { + guard TFL_InterpreterInvoke(cInterpreter) == kTfLiteOk else { + // TODO(b/117510052): Determine which error to throw. + throw InterpreterError.allocateTensorsRequired + } + } + + /// Returns the input tensor at the given index. + /// + /// - Parameters: + /// - index: The index for the input tensor. + /// - Throws: An error if the index is invalid or the tensors have not been allocated. + /// - Returns: The input tensor at the given index. + public func input(at index: Int) throws -> Tensor { + let maxIndex = inputTensorCount - 1 + guard case 0...maxIndex = index else { + throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex) + } + guard let cTensor = TFL_InterpreterGetInputTensor(cInterpreter, Int32(index)), + let bytes = TFL_TensorData(cTensor), + let nameCString = TFL_TensorName(cTensor) + else { + throw InterpreterError.allocateTensorsRequired + } + guard let dataType = TensorDataType(type: TFL_TensorType(cTensor)) else { + throw InterpreterError.invalidTensorDataType + } + + let name = String(cString: nameCString) + let rank = TFL_TensorNumDims(cTensor) + let dimensions = (0.. Tensor { + let maxIndex = outputTensorCount - 1 + guard case 0...maxIndex = index else { + throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex) + } + guard let cTensor = TFL_InterpreterGetOutputTensor(cInterpreter, Int32(index)), + let bytes = TFL_TensorData(cTensor), + let nameCString = TFL_TensorName(cTensor) + else { + // TODO(b/117510052): Determine which error to throw. + throw InterpreterError.invokeInterpreterRequired + } + guard let dataType = TensorDataType(type: TFL_TensorType(cTensor)) else { + throw InterpreterError.invalidTensorDataType + } + + let name = String(cString: nameCString) + let rank = TFL_TensorNumDims(cTensor) + let dimensions = (0.. Tensor { + let maxIndex = inputTensorCount - 1 + guard case 0...maxIndex = index else { + throw InterpreterError.invalidTensorIndex(index: index, maxIndex: maxIndex) + } + guard let cTensor = TFL_InterpreterGetInputTensor(cInterpreter, Int32(index)) else { + throw InterpreterError.allocateTensorsRequired + } + + let byteCount = TFL_TensorByteSize(cTensor) + guard data.count == byteCount else { + throw InterpreterError.invalidTensorDataCount(provided: data.count, required: byteCount) + } + + let status = data.withUnsafeBytes { TFL_TensorCopyFromBuffer(cTensor, $0, data.count) } + guard status == kTfLiteOk else { throw InterpreterError.failedToCopyDataToInputTensor } + return try input(at: index) + } + + /// Allocates memory for all input tensors based on their `TensorShape`s. + /// + /// - Note: This is a relatively expensive operation and should only be called after creating the + /// interpreter and/or resizing any input tensors. + /// - Throws: An error if memory could not be allocated for the input tensors. + public func allocateTensors() throws { + guard TFL_InterpreterAllocateTensors(cInterpreter) == kTfLiteOk else { + throw InterpreterError.failedToAllocateTensors + } + } +} + +// MARK: - Extensions + +extension String { + /// Returns a new `String` initialized by using the given format C array as a template into which + /// the remaining argument values are substituted according to the user’s default locale. + /// + /// - Note: Returns `nil` if a new `String` could not be constructed from the given values. + /// - Parameters: + /// - cFormat: The format C array as a template for substituting values. + /// - arguments: A C pointer to a `va_list` of arguments to substitute into `cFormat`. + init?(cFormat: UnsafePointer, arguments: CVaListPointer) { + var buffer: UnsafeMutablePointer? + guard vasprintf(&buffer, cFormat, arguments) != 0, let cString = buffer else { return nil } + self.init(validatingUTF8: cString) + } +} diff --git a/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift b/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift new file mode 100644 index 0000000000..5de58b997a --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/InterpreterError.swift @@ -0,0 +1,99 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// TensorFlow Lite interpreter errors. +public enum InterpreterError: Error { + case invalidTensorIndex(index: Int, maxIndex: Int) + case invalidTensorDataCount(provided: Int, required: Int) + case invalidTensorDataType + case failedToLoadModel + case failedToCreateInterpreter + case failedToResizeInputTensor(index: Int) + case failedToCopyDataToInputTensor + case failedToAllocateTensors + case allocateTensorsRequired + case invokeInterpreterRequired + case tensorFlowLiteError(String) +} + +// MARK: - Extensions + +extension InterpreterError: LocalizedError { + /// Localized description of the interpreter error. + public var errorDescription: String? { + switch self { + case .invalidTensorIndex(let index, let maxIndex): + return "Invalid tensor index \(index), max index is \(maxIndex)." + case .invalidTensorDataCount(let providedCount, let requiredCount): + return "Provided data count \(providedCount) must match the required count \(requiredCount)." + case .invalidTensorDataType: + return "Tensor data type is unsupported or could not be determined because of a model error." + case .failedToLoadModel: + return "Failed to load the given model." + case .failedToCreateInterpreter: + return "Failed to create the interpreter." + case .failedToResizeInputTensor(let index): + return "Failed to resize input tesnor at index \(index)." + case .failedToCopyDataToInputTensor: + return "Failed to copy data to input tensor." + case .failedToAllocateTensors: + return "Failed to allocate memory for input tensors." + case .allocateTensorsRequired: + return "Must call allocateTensors()." + case .invokeInterpreterRequired: + return "Must call invoke()." + case .tensorFlowLiteError(let message): + return "TensorFlow Lite Error: \(message)" + } + } +} + +extension InterpreterError: CustomStringConvertible { + /// Textual representation of the TensorFlow Lite interpreter error. + public var description: String { + return errorDescription ?? "Unknown error." + } +} + +#if swift(>=4.2) +extension InterpreterError: Equatable {} +#else +extension InterpreterError: Equatable { + public static func == (lhs: InterpreterError, rhs: InterpreterError) -> Bool { + switch (lhs, rhs) { + case (.invalidTensorDataType, .invalidTensorDataType), + (.failedToLoadModel, .failedToLoadModel), + (.failedToCreateInterpreter, .failedToCreateInterpreter), + (.failedToAllocateTensors, .failedToAllocateTensors), + (.allocateTensorsRequired, .allocateTensorsRequired), + (.invokeInterpreterRequired, .invokeInterpreterRequired): + return true + case (.invalidTensorIndex(let lhsIndex, let lhsMaxIndex), + .invalidTensorIndex(let rhsIndex, let rhsMaxIndex)): + return lhsIndex == rhsIndex && lhsMaxIndex == rhsMaxIndex + case (.invalidTensorDataCount(let lhsProvidedCount, let lhsRequiredCount), + .invalidTensorDataCount(let rhsProvidedCount, let rhsRequiredCount)): + return lhsProvidedCount == rhsProvidedCount && lhsRequiredCount == rhsRequiredCount + case (.failedToResizeInputTensor(let lhsIndex), .failedToResizeInputTensor(let rhsIndex)): + return lhsIndex == rhsIndex + case (.tensorFlowLiteError(let lhsMessage), .tensorFlowLiteError(let rhsMessage)): + return lhsMessage == rhsMessage + default: + return false + } + } +} +#endif // swift(>=4.2) diff --git a/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift b/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift new file mode 100644 index 0000000000..2365fd7ade --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/InterpreterOptions.swift @@ -0,0 +1,29 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Custom configuration options for a TensorFlow Lite interpreter. +public struct InterpreterOptions: Equatable { + + /// Maximum number of CPU threads that the interpreter should run on. Default is `nil` which + /// indicates that the `Interpreter` will decide the number of threads to use. + public var threadCount: Int? = nil + + /// Whether error logging to the console is enabled. The default is `false`. + public var isErrorLoggingEnabled = false + + /// Creates a new instance of interpreter options. + public init() {} +} diff --git a/tensorflow/lite/experimental/swift/Sources/Model.swift b/tensorflow/lite/experimental/swift/Sources/Model.swift new file mode 100644 index 0000000000..e8c49ff1ae --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/Model.swift @@ -0,0 +1,40 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import TensorFlowLiteCAPI + +/// A TensorFlow Lite model used by the 'Interpreter` to perform inference. +final class Model { + + /// The `TFL_Model` C pointer type represented as an `UnsafePointer`. + typealias CModel = OpaquePointer + + /// The underlying `TFL_Model` C pointer. + let cModel: CModel? + + /// Creates a new model instance. + /// + /// - Precondition: Initialization can fail if the given `filePath` is invalid. + /// - Parameters: + /// - filePath: Local file path to a TensorFlow Lite model. + init?(filePath: String) { + guard !filePath.isEmpty, let cModel = TFL_NewModelFromFile(filePath) else { return nil } + self.cModel = cModel + } + + deinit { + TFL_DeleteModel(cModel) + } +} diff --git a/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift b/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift new file mode 100644 index 0000000000..f367875644 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/QuantizationParameters.swift @@ -0,0 +1,38 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Parameters that determine the mapping of quantized values to real values. Quantized values can +/// be mapped to float values using the following conversion: +/// `realValue = scale * (quantizedValue - zeroPoint)`. +public struct QuantizationParameters { + + /// Difference between real values corresponding to consecutive quantized values differing by 1. + /// For example, the range of quantized values for `UInt8` data type is [0, 255]. + public let scale: Float + + /// Quantized value that corresponds to the real 0 value. + public let zeroPoint: Int + + /// Creates a new quantization parameters instance. + /// + /// - Parameters: + /// - scale: Scale value for asymmetric quantization. + /// - zeroPoint: Zero point for asymmetric quantization. + init(scale: Float, zeroPoint: Int) { + self.scale = scale + self.zeroPoint = zeroPoint + } +} diff --git a/tensorflow/lite/experimental/swift/Sources/Tensor.swift b/tensorflow/lite/experimental/swift/Sources/Tensor.swift new file mode 100644 index 0000000000..b738d87549 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Sources/Tensor.swift @@ -0,0 +1,138 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation +import TensorFlowLiteCAPI + +/// An input or output tensor in a TensorFlow Lite graph. +public struct Tensor { + + /// Name of the tensor. + public let name: String + + /// Data type of the tensor. + public let dataType: TensorDataType + + /// Shape of the tensor. + public let shape: TensorShape + + /// Data in the input or output tensor. + public let data: Data + + /// Quantization parameters for the tensor if using a quantized model. + public let quantizationParameters: QuantizationParameters? + + /// Creates a new input or output tensor instance. + /// + /// - Parameters: + /// - name: Name of the tensor. + /// - dataType: Data type of the tensor. + /// - data: Data in the input tensor. + /// - quantizationParameters Quantization parameters for the tensor if using a quantized model. + /// The default is `nil`. + init( + name: String, + dataType: TensorDataType, + shape: TensorShape, + data: Data, + quantizationParameters: QuantizationParameters? = nil + ) { + self.name = name + self.dataType = dataType + self.shape = shape + self.data = data + self.quantizationParameters = quantizationParameters + } +} + +/// Supported TensorFlow Lite tensor data types. +public enum TensorDataType: Equatable { + /// 32-bit single precision floating point tensor data type. + case float32 + /// 8-bit unsigned integer tensor data type. + case uInt8 + /// 16-bit signed integer tensor data type. + case int16 + /// 32-bit signed integer tensor data type. + case int32 + /// 64-bit signed integer tensor data type. + case int64 + /// Boolean tensor data type. + case bool + + /// Creates a new tensor data type from the given `TFL_Type` or `nil` if the data type is + /// unsupported or could not be determined because there was an error. + /// + /// - Parameter type: A data type supported by a tensor. + init?(type: TFL_Type) { + switch type { + case kTfLiteFloat32: + self = .float32 + case kTfLiteUInt8: + self = .uInt8 + case kTfLiteInt16: + self = .int16 + case kTfLiteInt32: + self = .int32 + case kTfLiteInt64: + self = .int64 + case kTfLiteBool: + self = .bool + case kTfLiteNoType: + fallthrough + default: + return nil + } + } +} + +/// The shape of a TensorFlow Lite tensor. +public struct TensorShape { + + /// The number of dimensions of the tensor. + public let rank: Int + + /// Array of dimensions for the tensor. + public let dimensions: [Int] + + /// Array of `Int32` dimensions for the tensor. + var int32Dimensions: [Int32] { return dimensions.map(Int32.init) } + + /// Creates a new tensor shape instance with the given array of dimensions. + /// + /// - Parameters: + /// - dimensions: Dimensions for the tensor. + public init(_ dimensions: [Int]) { + self.rank = dimensions.count + self.dimensions = dimensions + } + + /// Creates a new tensor shape instance with the given elements representing the dimensions. + /// + /// - Parameters: + /// - elements: Dimensions for the tensor. + public init(_ elements: Int...) { + self.init(elements) + } +} + +extension TensorShape: ExpressibleByArrayLiteral { + /// Creates a new tensor shape instance with the given array literal representing the dimensions. + /// + /// - Parameters: + /// - arrayLiteral: Dimensions for the tensor. + public init(arrayLiteral: Int...) { + self.init(arrayLiteral) + } +} diff --git a/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen new file mode 100644 index 0000000000..16bc6cbfe8 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/Configs/TensorFlowLite.tulsigen @@ -0,0 +1,57 @@ +{ + "sourceFilters" : [ + "tensorflow/lite/experimental/c", + "tensorflow/lite/experimental/swift", + "tensorflow/lite/experimental/swift/Sources", + "tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp", + "tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj", + "tensorflow/lite/experimental/swift/Tests", + ], + "buildTargets" : [ + "//tensorflow/lite/experimental/swift:TensorFlowLite", + "//tensorflow/lite/experimental/swift:TensorFlowLiteApp", + "//tensorflow/lite/experimental/swift:TensorFlowLiteTests", + ], + "projectName" : "TensorFlowLite", + "optionSet" : { + "LaunchActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsRelease" : { + "p" : "$(inherited)" + }, + "BazelBuildOptionsDebug" : { + "p" : "$(inherited)" + }, + "EnvironmentVariables" : { + "p" : "$(inherited)" + }, + "BuildActionPreActionScript" : { + "p" : "$(inherited)" + }, + "CommandlineArguments" : { + "p" : "$(inherited)" + }, + "TestActionPreActionScript" : { + "p" : "$(inherited)" + }, + "BazelBuildStartupOptionsDebug" : { + "p" : "$(inherited)" + }, + "BuildActionPostActionScript" : { + "p" : "$(inherited)" + }, + "TestActionPostActionScript" : { + "p" : "$(inherited)" + }, + "LaunchActionPostActionScript" : { + "p" : "$(inherited)" + } + }, + "additionalFilePaths" : [ + "tensorflow/lite/experimental/swift/BUILD" + ] +} diff --git a/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf new file mode 100644 index 0000000000..82ac8aa381 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TensorFlowLite.tulsiproj/project.tulsiconf @@ -0,0 +1,14 @@ +{ + "configDefaults" : { + "optionSet" : { + "ProjectPrioritizesSwift" : { + "p" : "YES" + } + } + }, + "projectName" : "TensorFlowLite", + "packages" : [ + "tensorflow/lite/experimental/swift" + ], + "workspaceRoot" : "../../../../.." +} diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..fbbf9a1de2 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp.xcodeproj/project.pbxproj @@ -0,0 +1,345 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 4A7304B421500B8400C90B21 /* Data+TensorFlowLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */; }; + 4AA72B732146ED64006C3AEF /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA72B722146ED64006C3AEF /* AppDelegate.swift */; }; + 4AA72B752146ED64006C3AEF /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA72B742146ED64006C3AEF /* ViewController.swift */; }; + 4AA72B782146ED64006C3AEF /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B762146ED64006C3AEF /* Main.storyboard */; }; + 4AA72B7A2146ED66006C3AEF /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B792146ED66006C3AEF /* Assets.xcassets */; }; + 4AA72B7D2146ED66006C3AEF /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */; }; + 4ADDE0CE2176600E00FF07A2 /* Array+TensorFlowLite.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Data+TensorFlowLite.swift"; sourceTree = ""; }; + 4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = TensorFlowLiteApp.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 4AA72B722146ED64006C3AEF /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; + 4AA72B742146ED64006C3AEF /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; + 4AA72B772146ED64006C3AEF /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; + 4AA72B792146ED66006C3AEF /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 4AA72B7C2146ED66006C3AEF /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 4AA72B7E2146ED66006C3AEF /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Array+TensorFlowLite.swift"; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 4AA72B6C2146ED64006C3AEF /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 4AA72B662146ED64006C3AEF = { + isa = PBXGroup; + children = ( + 4AA72B712146ED64006C3AEF /* TensorFlowLiteApp */, + 4AA72B702146ED64006C3AEF /* Products */, + ); + sourceTree = ""; + }; + 4AA72B702146ED64006C3AEF /* Products */ = { + isa = PBXGroup; + children = ( + 4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */, + ); + name = Products; + sourceTree = ""; + }; + 4AA72B712146ED64006C3AEF /* TensorFlowLiteApp */ = { + isa = PBXGroup; + children = ( + 4AA72B722146ED64006C3AEF /* AppDelegate.swift */, + 4ADDE0CD2176600900FF07A2 /* Array+TensorFlowLite.swift */, + 4A7304B321500B8300C90B21 /* Data+TensorFlowLite.swift */, + 4AA72B742146ED64006C3AEF /* ViewController.swift */, + 4AA72B762146ED64006C3AEF /* Main.storyboard */, + 4AA72B792146ED66006C3AEF /* Assets.xcassets */, + 4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */, + 4AA72B7E2146ED66006C3AEF /* Info.plist */, + ); + path = TensorFlowLiteApp; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 4AA72B6E2146ED64006C3AEF /* TensorFlowLiteApp */ = { + isa = PBXNativeTarget; + buildConfigurationList = 4AA72B812146ED66006C3AEF /* Build configuration list for PBXNativeTarget "TensorFlowLiteApp" */; + buildPhases = ( + 4AA72B6B2146ED64006C3AEF /* Sources */, + 4AA72B6C2146ED64006C3AEF /* Frameworks */, + 4AA72B6D2146ED64006C3AEF /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = TensorFlowLiteApp; + productName = TensorFlowLiteApp; + productReference = 4AA72B6F2146ED64006C3AEF /* TensorFlowLiteApp.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 4AA72B672146ED64006C3AEF /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 0940; + LastUpgradeCheck = 0940; + ORGANIZATIONNAME = Google; + TargetAttributes = { + 4AA72B6E2146ED64006C3AEF = { + CreatedOnToolsVersion = 9.4.1; + }; + }; + }; + buildConfigurationList = 4AA72B6A2146ED64006C3AEF /* Build configuration list for PBXProject "TensorFlowLiteApp" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 4AA72B662146ED64006C3AEF; + productRefGroup = 4AA72B702146ED64006C3AEF /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 4AA72B6E2146ED64006C3AEF /* TensorFlowLiteApp */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 4AA72B6D2146ED64006C3AEF /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4AA72B7D2146ED66006C3AEF /* LaunchScreen.storyboard in Resources */, + 4AA72B7A2146ED66006C3AEF /* Assets.xcassets in Resources */, + 4AA72B782146ED64006C3AEF /* Main.storyboard in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 4AA72B6B2146ED64006C3AEF /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 4AA72B732146ED64006C3AEF /* AppDelegate.swift in Sources */, + 4ADDE0CE2176600E00FF07A2 /* Array+TensorFlowLite.swift in Sources */, + 4A7304B421500B8400C90B21 /* Data+TensorFlowLite.swift in Sources */, + 4AA72B752146ED64006C3AEF /* ViewController.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXVariantGroup section */ + 4AA72B762146ED64006C3AEF /* Main.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 4AA72B772146ED64006C3AEF /* Base */, + ); + name = Main.storyboard; + sourceTree = ""; + }; + 4AA72B7B2146ED66006C3AEF /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 4AA72B7C2146ED66006C3AEF /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 4AA72B7F2146ED66006C3AEF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 11.4; + MTL_ENABLE_DEBUG_INFO = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 4AA72B802146ED66006C3AEF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 11.4; + MTL_ENABLE_DEBUG_INFO = NO; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 4AA72B822146ED66006C3AEF /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = TensorFlowLiteApp/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.tensorflow.lite.swift.TensorFlowLite; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 4.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 4AA72B832146ED66006C3AEF /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_STYLE = Automatic; + INFOPLIST_FILE = TensorFlowLiteApp/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = com.tensorflow.lite.swift.TensorFlowLite; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 4.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 4AA72B6A2146ED64006C3AEF /* Build configuration list for PBXProject "TensorFlowLiteApp" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4AA72B7F2146ED66006C3AEF /* Debug */, + 4AA72B802146ED66006C3AEF /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 4AA72B812146ED66006C3AEF /* Build configuration list for PBXNativeTarget "TensorFlowLiteApp" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 4AA72B822146ED66006C3AEF /* Debug */, + 4AA72B832146ED66006C3AEF /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 4AA72B672146ED64006C3AEF /* Project object */; +} diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift new file mode 100644 index 0000000000..ffa90a06ad --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/AppDelegate.swift @@ -0,0 +1,24 @@ +import UIKit + +@UIApplicationMain + +final class AppDelegate: UIResponder, UIApplicationDelegate { + + /// The main window of the app. + var window: UIWindow? + + func application( + _ application: UIApplication, + didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]? = nil + ) -> Bool { + return true + } +} + +// MARK: - Extensions + +#if !swift(>=4.2) +extension UIApplication { + typealias LaunchOptionsKey = UIApplicationLaunchOptionsKey +} +#endif // !swift(>=4.2) diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift new file mode 100644 index 0000000000..56df1ce659 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Array+TensorFlowLite.swift @@ -0,0 +1,22 @@ +import Foundation + +extension Array { + /// Creates a new array from the bytes of the given unsafe data. + /// + /// - Warning: The array's `Element` type must be trivial in that it can be copied bit for bit + /// with no indirection or reference-counting operations; otherwise, copying the raw bytes in + /// the `unsafeData`'s buffer to a new array returns an unsafe copy. + /// - Note: Returns `nil` if `unsafeData.count` is not a multiple of + /// `MemoryLayout.stride`. + /// - Parameter unsafeData: The data containing the bytes to turn into an array. + init?(unsafeData: Data) { + guard unsafeData.count % MemoryLayout.stride == 0 else { return nil } + let elements = unsafeData.withUnsafeBytes { + UnsafeBufferPointer( + start: $0, + count: unsafeData.count / MemoryLayout.stride + ) + } + self.init(elements) + } +} diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 0000000000..d8db8d65fd --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,98 @@ +{ + "images" : [ + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "20x20", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "29x29", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "40x40", + "scale" : "3x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "2x" + }, + { + "idiom" : "iphone", + "size" : "60x60", + "scale" : "3x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "20x20", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "29x29", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "40x40", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "1x" + }, + { + "idiom" : "ipad", + "size" : "76x76", + "scale" : "2x" + }, + { + "idiom" : "ipad", + "size" : "83.5x83.5", + "scale" : "2x" + }, + { + "idiom" : "ios-marketing", + "size" : "1024x1024", + "scale" : "1x" + } + ], + "info" : { + "version" : 1, + "author" : "xcode" + } +} \ No newline at end of file diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json new file mode 100644 index 0000000000..da4a164c91 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "version" : 1, + "author" : "xcode" + } +} \ No newline at end of file diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 0000000000..a07a1321be --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard new file mode 100644 index 0000000000..10cae6e855 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Base.lproj/Main.storyboard @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift new file mode 100644 index 0000000000..bc8a70c848 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Data+TensorFlowLite.swift @@ -0,0 +1,13 @@ +import Foundation + +extension Data { + /// Creates a new buffer by copying the buffer pointer of the given array. + /// + /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit + /// for bit with no indirection or reference-counting operations; otherwise, reinterpreting + /// data from the resulting buffer has undefined behavior. + /// - Parameter array: An array with elements of type `T`. + init(copyingBufferOf array: [T]) { + self = array.withUnsafeBufferPointer(Data.init) + } +} diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist new file mode 100644 index 0000000000..3ca3875f04 --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/Info.plist @@ -0,0 +1,46 @@ + + + + + CFBundleDevelopmentRegion + en + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 0.0.1 + LSRequiresIPhoneOS + + NSCameraUsageDescription + NSCameraUsageDescription + NSPhotoLibraryUsageDescription + Select a photo to detect objects in. + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + + + diff --git a/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift new file mode 100644 index 0000000000..73c74fd19c --- /dev/null +++ b/tensorflow/lite/experimental/swift/TestApps/TensorFlowLiteApp/TensorFlowLiteApp/ViewController.swift @@ -0,0 +1,299 @@ +import TensorFlowLite +import UIKit + +class ViewController: UIViewController { + + // MARK: - Properties + + /// TensorFlowLite interpreter object for performing inference from a given model. + private var interpreter: Interpreter? + + /// Serial dispatch queue for managing `Interpreter` calls. + private let interpreterQueue = DispatchQueue( + label: Constant.dispatchQueueLabel, + qos: .userInitiated + ) + + /// The currently selected model. + private var currentModel: Model { + guard let currentModel = Model(rawValue: modelControl.selectedSegmentIndex) else { + preconditionFailure("Invalid model for selected segment index.") + } + return currentModel + } + + /// A description of the current model. + private var modelDescription: String { + guard let interpreter = interpreter else { return "" } + let inputCount = interpreter.inputTensorCount + let outputCount = interpreter.outputTensorCount + let inputTensors = (0.. String = { + guard let results = [Float32](unsafeData: outputTensor.data) else { return "No results." } + return resultsText + results.description + } + self.updateResultsText(results()) + } catch let error { + self.updateResultsText( + "Failed to invoke the interpreter with error: \(error.localizedDescription)" + ) + return + } + } + } + + private func invokeAddQuantized() { + interpreterQueue.async { + guard let interpreter = self.interpreter else { + self.updateResultsText(Constant.nilInterpreterErrorMessage) + return + } + do { + try interpreter.resizeInput(at: 0, to: [2]) + try interpreter.allocateTensors() + let input: [UInt8] = [1, 3] + let resultsText = self.modelDescription + "\n\n" + + "Performing 2 add operations on quantized input \(input.description) equals: " + self.updateResultsText(resultsText) + let data = Data(input) + try interpreter.copy(data, toInputAt: 0) + try interpreter.invoke() + let outputTensor = try interpreter.output(at: 0) + let results: () -> String = { + guard let quantizationParameters = outputTensor.quantizationParameters else { + return "No results." + } + let quantizedResults = [UInt8](outputTensor.data) + let dequantizedResults = quantizedResults.map { + quantizationParameters.scale * Float(Int($0) - quantizationParameters.zeroPoint) + } + return resultsText + quantizedResults.description + + ", dequantized results: " + dequantizedResults.description + } + self.updateResultsText(results()) + } catch let error { + self.updateResultsText( + "Failed to invoke the interpreter with error: \(error.localizedDescription)" + ) + return + } + } + } + + private func invokeMultiAdd() { + interpreterQueue.async { + guard let interpreter = self.interpreter else { + self.updateResultsText(Constant.nilInterpreterErrorMessage) + return + } + do { + let shape = TensorShape(2) + try (0.. [Float32] in + let input = [Float32(index + 1), Float32(index + 2)] + let data = Data(copyingBufferOf: input) + try interpreter.copy(data, toInputAt: index) + return input + } + let resultsText = self.modelDescription + "\n\n" + + "Performing 3 add operations on inputs \(inputs.description) equals: " + self.updateResultsText(resultsText) + try interpreter.invoke() + let results = try (0.. [Float32] in + let tensor = try interpreter.output(at: index) + return [Float32](unsafeData: tensor.data) ?? [] + } + self.updateResultsText(resultsText + results.description) + } catch let error { + self.updateResultsText( + "Failed to invoke the interpreter with error: \(error.localizedDescription)" + ) + return + } + } + } + + private func updateResultsText(_ text: String? = nil) { + safeDispatchOnMain { self.resultsTextView.text = text } + } +} + +// MARK: - Constants + +private enum Constant { + static let dispatchQueueLabel = "TensorFlowLiteInterpreterQueue" + static let nilInterpreterErrorMessage = + "Failed to invoke the interpreter because the interpreter was nil." +} + +/// Models that can be loaded by the TensorFlow Lite `Interpreter`. +private enum Model: Int, CustomStringConvertible { + /// A float model that performs two add operations on one input tensor and returns the result in + /// one output tensor. + case add = 0 + /// A quantized model that performs two add operations on one input tensor and returns the result + /// in one output tensor. + case addQuantized = 1 + /// A float model that performs three add operations on four input tensors and returns the results + /// in 2 output tensors. + case multiAdd = 2 + + var fileInfo: (name: String, extension: String) { + switch self { + case .add: + return Add.fileInfo + case .addQuantized: + return AddQuantized.fileInfo + case .multiAdd: + return MultiAdd.fileInfo + } + } + + // MARK: - CustomStringConvertible + + var description: String { + switch self { + case .add: + return Add.name + case .addQuantized: + return AddQuantized.name + case .multiAdd: + return MultiAdd.name + } + } +} + +/// Values for the `Add` model. +private enum Add { + static let name = "Add" + static let fileInfo = (name: "add", extension: "bin") +} + +/// Values for the `AddQuantized` model. +private enum AddQuantized { + static let name = "AddQuantized" + static let fileInfo = (name: "add_quantized", extension: "bin") +} + +/// Values for the `MultiAdd` model. +private enum MultiAdd { + static let name = "MultiAdd" + static let fileInfo = (name: "multi_add", extension: "bin") +} + +// MARK: - Fileprivate + +/// Safely dispatches the given block on the main queue. If the current thread is `main`, the block +/// is executed synchronously; otherwise, the block is executed asynchronously on the main thread. +fileprivate func safeDispatchOnMain(_ block: @escaping () -> Void) { + if Thread.isMainThread { block(); return } + DispatchQueue.main.async { block() } +} diff --git a/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift b/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift new file mode 100644 index 0000000000..54b4f59b28 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Tests/InterpreterOptionsTests.swift @@ -0,0 +1,54 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import TensorFlowLite +import XCTest + +class InterpreterOptionsTests: XCTestCase { + + func testInterpreterOptions_InitWithDefaultValues() { + let options = InterpreterOptions() + XCTAssertNil(options.threadCount) + XCTAssertFalse(options.isErrorLoggingEnabled) + } + + func testInterpreterOptions_InitWithCustomValues() { + var options = InterpreterOptions() + options.threadCount = 2 + XCTAssertEqual(options.threadCount, 2) + options.isErrorLoggingEnabled = true + XCTAssertTrue(options.isErrorLoggingEnabled) + } + + func testInterpreterOptions_Equatable() { + var options1 = InterpreterOptions() + var options2 = InterpreterOptions() + XCTAssertEqual(options1, options2) + + options1.threadCount = 2 + options2.threadCount = 2 + XCTAssertEqual(options1, options2) + + options2.threadCount = 3 + XCTAssertNotEqual(options1, options2) + options2.threadCount = 2 + + options1.isErrorLoggingEnabled = true + options2.isErrorLoggingEnabled = true + XCTAssertEqual(options1, options2) + + options2.isErrorLoggingEnabled = false + XCTAssertNotEqual(options1, options2) + } +} diff --git a/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift b/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift new file mode 100644 index 0000000000..e98da5f951 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Tests/InterpreterTests.swift @@ -0,0 +1,315 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import TensorFlowLite +import XCTest + +class InterpreterTests: XCTestCase { + + var interpreter: Interpreter! + + override func setUp() { + super.setUp() + + interpreter = try! Interpreter(modelPath: AddModel.path) + } + + override func tearDown() { + interpreter = nil + + super.tearDown() + } + + func testInterpreter_InitWithModelPath() { + XCTAssertNoThrow(try Interpreter(modelPath: AddModel.path)) + } + + func testInterpreter_Init_ThrowsFailedToLoadModel() { + XCTAssertThrowsError(try Interpreter(modelPath: "/invalid/path")) { error in + self.assertEqualErrors(actual: error, expected: .failedToLoadModel) + } + } + + func testInterpreter_InitWithModelPathAndOptions() { + var options = InterpreterOptions() + options.threadCount = 2 + XCTAssertNoThrow(try Interpreter(modelPath: AddModel.path, options: options)) + } + + func testInterpreter_InputTensorCount() { + XCTAssertEqual(interpreter.inputTensorCount, AddModel.inputTensorCount) + } + + func testInterpreter_OutputTensorCount() { + XCTAssertEqual(interpreter.outputTensorCount, AddModel.outputTensorCount) + } + + func testInterpreter_Invoke() throws { + try interpreter.allocateTensors() + XCTAssertNoThrow(try interpreter.invoke()) + } + + func testInterpreter_Invoke_ThrowsAllocateTensorsRequired_ModelNotReady() { + XCTAssertThrowsError(try interpreter.invoke()) { error in + self.assertEqualErrors(actual: error, expected: .allocateTensorsRequired) + } + } + + func testInterpreter_InputTensorAtIndex() throws { + try setUpAddModelInputTensor() + let inputTensor = try interpreter.input(at: AddModel.validIndex) + XCTAssertEqual(inputTensor, AddModel.inputTensor) + } + + func testInterpreter_InputTensorAtIndex_QuantizedModel() throws { + interpreter = try Interpreter(modelPath: AddQuantizedModel.path) + try setUpAddQuantizedModelInputTensor() + let inputTensor = try interpreter.input(at: AddQuantizedModel.inputOutputIndex) + XCTAssertEqual(inputTensor, AddQuantizedModel.inputTensor) + } + + func testInterpreter_InputTensorAtIndex_ThrowsInvalidIndex() throws { + try interpreter.allocateTensors() + XCTAssertThrowsError(try interpreter.input(at: AddModel.invalidIndex)) { error in + let maxIndex = AddModel.inputTensorCount - 1 + self.assertEqualErrors( + actual: error, + expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex) + ) + } + } + + func testInterpreter_InputTensorAtIndex_ThrowsAllocateTensorsRequired() { + XCTAssertThrowsError(try interpreter.input(at: AddModel.validIndex)) { error in + self.assertEqualErrors(actual: error, expected: .allocateTensorsRequired) + } + } + + func testInterpreter_OutputTensorAtIndex() throws { + try setUpAddModelInputTensor() + try interpreter.invoke() + let outputTensor = try interpreter.output(at: AddModel.validIndex) + XCTAssertEqual(outputTensor, AddModel.outputTensor) + let expectedResults = [Float32](unsafeData: outputTensor.data) + XCTAssertEqual(expectedResults, AddModel.results) + } + + func testInterpreter_OutputTensorAtIndex_QuantizedModel() throws { + interpreter = try Interpreter(modelPath: AddQuantizedModel.path) + try setUpAddQuantizedModelInputTensor() + try interpreter.invoke() + let outputTensor = try interpreter.output(at: AddQuantizedModel.inputOutputIndex) + XCTAssertEqual(outputTensor, AddQuantizedModel.outputTensor) + let expectedResults = [UInt8](outputTensor.data) + XCTAssertEqual(expectedResults, AddQuantizedModel.results) + } + + func testInterpreter_OutputTensorAtIndex_ThrowsInvalidIndex() throws { + try interpreter.allocateTensors() + try interpreter.invoke() + XCTAssertThrowsError(try interpreter.output(at: AddModel.invalidIndex)) { error in + let maxIndex = AddModel.outputTensorCount - 1 + self.assertEqualErrors( + actual: error, + expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex) + ) + } + } + + func testInterpreter_OutputTensorAtIndex_ThrowsInvokeInterpreterRequired() { + XCTAssertThrowsError(try interpreter.output(at: AddModel.validIndex)) { error in + self.assertEqualErrors(actual: error, expected: .invokeInterpreterRequired) + } + } + + func testInterpreter_ResizeInputTensorAtIndexToShape() { + XCTAssertNoThrow(try interpreter.resizeInput(at: AddModel.validIndex, to: [2, 2, 3])) + XCTAssertNoThrow(try interpreter.allocateTensors()) + } + + func testInterpreter_ResizeInputTensorAtIndexToShape_ThrowsInvalidIndex() { + XCTAssertThrowsError(try interpreter.resizeInput( + at: AddModel.invalidIndex, + to: [2, 2, 3] + )) { error in + let maxIndex = AddModel.inputTensorCount - 1 + self.assertEqualErrors( + actual: error, + expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex) + ) + } + } + + func testInterpreter_CopyDataToInputTensorAtIndex() throws { + try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape) + try interpreter.allocateTensors() + let inputTensor = try interpreter.copy(AddModel.inputData, toInputAt: AddModel.validIndex) + XCTAssertEqual(inputTensor.data, AddModel.inputData) + } + + func testInterpreter_CopyDataToInputTensorAtIndex_ThrowsInvalidIndex() { + XCTAssertThrowsError(try interpreter.copy( + AddModel.inputData, + toInputAt: AddModel.invalidIndex + )) { error in + let maxIndex = AddModel.inputTensorCount - 1 + self.assertEqualErrors( + actual: error, + expected: .invalidTensorIndex(index: AddModel.invalidIndex, maxIndex: maxIndex) + ) + } + } + + func testInterpreter_CopyDataToInputTensorAtIndex_ThrowsInvalidDataCount() throws { + try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape) + try interpreter.allocateTensors() + let invalidData = Data(count: AddModel.dataCount - 1) + XCTAssertThrowsError(try interpreter.copy( + invalidData, + toInputAt: AddModel.validIndex + )) { error in + self.assertEqualErrors( + actual: error, + expected: .invalidTensorDataCount(provided: invalidData.count, required: AddModel.dataCount) + ) + } + } + + func testInterpreter_AllocateTensors() { + XCTAssertNoThrow(try interpreter.allocateTensors()) + } + + // MARK: - Private + + private func setUpAddModelInputTensor() throws { + precondition(interpreter != nil) + try interpreter.resizeInput(at: AddModel.validIndex, to: AddModel.shape) + try interpreter.allocateTensors() + try interpreter.copy(AddModel.inputData, toInputAt: AddModel.validIndex) + } + + private func setUpAddQuantizedModelInputTensor() throws { + precondition(interpreter != nil) + try interpreter.resizeInput(at: AddQuantizedModel.inputOutputIndex, to: AddQuantizedModel.shape) + try interpreter.allocateTensors() + try interpreter.copy(AddQuantizedModel.inputData, toInputAt: AddQuantizedModel.inputOutputIndex) + } + + private func assertEqualErrors(actual: Error, expected: InterpreterError) { + guard let actual = actual as? InterpreterError else { + XCTFail("Actual error should be of type InterpreterError.") + return + } + XCTAssertEqual(actual, expected) + } +} + +// MARK: - Constants + +/// Values for the `add.bin` model. +private enum AddModel { + static let info = (name: "add", extension: "bin") + static let inputTensorCount = 1 + static let outputTensorCount = 1 + static let invalidIndex = 1 + static let validIndex = 0 + static let shape: TensorShape = [2] + static let dataCount = inputData.count + static let inputData = Data(copyingBufferOf: [Float32(1.0), Float32(3.0)]) + static let outputData = Data(copyingBufferOf: [Float32(3.0), Float32(9.0)]) + static let results = [Float32(3.0), Float32(9.0)] + + static let inputTensor = Tensor( + name: "input", + dataType: .float32, + shape: shape, + data: inputData + ) + static let outputTensor = Tensor( + name: "output", + dataType: .float32, + shape: shape, + data: outputData + ) + + static var path: String = { + let bundle = Bundle(for: InterpreterTests.self) + guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" } + return path + }() +} + +/// Values for the `add_quantized.bin` model. +private enum AddQuantizedModel { + static let info = (name: "add_quantized", extension: "bin") + static let inputOutputIndex = 0 + static let shape: TensorShape = [2] + static let inputData = Data([1, 3]) + static let outputData = Data([3, 9]) + static let quantizationParameters = QuantizationParameters(scale: 0.003922, zeroPoint: 0) + static let results: [UInt8] = [3, 9] + + static let inputTensor = Tensor( + name: "input", + dataType: .uInt8, + shape: shape, + data: inputData, + quantizationParameters: quantizationParameters + ) + static let outputTensor = Tensor( + name: "output", + dataType: .uInt8, + shape: shape, + data: outputData, + quantizationParameters: quantizationParameters + ) + + static var path: String = { + let bundle = Bundle(for: InterpreterTests.self) + guard let path = bundle.path(forResource: info.name, ofType: info.extension) else { return "" } + return path + }() +} + +// MARK: - Extensions + +extension Array { + /// Creates a new array from the bytes of the given unsafe data. + /// + /// - Note: Returns `nil` if `unsafeData.count` is not a multiple of + /// `MemoryLayout.stride`. + /// - Parameter unsafeData: The data containing the bytes to turn into an array. + init?(unsafeData: Data) { + guard unsafeData.count % MemoryLayout.stride == 0 else { return nil } + let elements = unsafeData.withUnsafeBytes { + UnsafeBufferPointer( + start: $0, + count: unsafeData.count / MemoryLayout.stride + ) + } + self.init(elements) + } +} + +extension Data { + /// Creates a new buffer by copying the buffer pointer of the given array. + /// + /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit + /// for bit with no indirection or reference-counting operations; otherwise, reinterpreting + /// data from the resulting buffer has undefined behavior. + /// - Parameter array: An array with elements of type `T`. + init(copyingBufferOf array: [T]) { + self = array.withUnsafeBufferPointer(Data.init) + } +} diff --git a/tensorflow/lite/experimental/swift/Tests/ModelTests.swift b/tensorflow/lite/experimental/swift/Tests/ModelTests.swift new file mode 100644 index 0000000000..025db18906 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Tests/ModelTests.swift @@ -0,0 +1,59 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import TensorFlowLite +import XCTest + +class ModelTests: XCTestCase { + + var modelPath: String! + + override func setUp() { + super.setUp() + + let bundle = Bundle(for: type(of: self)) + guard let modelPath = bundle.path( + forResource: Constant.modelInfo.name, + ofType: Constant.modelInfo.extension) + else { + XCTFail("Failed to get the model file path.") + return + } + self.modelPath = modelPath + } + + override func tearDown() { + modelPath = nil + + super.tearDown() + } + + func testModel_InitWithFilePath() { + XCTAssertNotNil(Model(filePath: modelPath)) + } + + func testModel_InitWithEmptyFilePath_FailsInitialization() { + XCTAssertNil(Model(filePath: "")) + } + + func testModel_InitWithInvalidFilePath_FailsInitialization() { + XCTAssertNil(Model(filePath: "invalid/path")) + } +} + +// MARK: - Constants + +private enum Constant { + static let modelInfo = (name: "add", extension: "bin") +} diff --git a/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift b/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift new file mode 100644 index 0000000000..65648c2698 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Tests/QuantizationParametersTests.swift @@ -0,0 +1,43 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import TensorFlowLite +import XCTest + +class QuantizationParametersTests: XCTestCase { + + func testQuantizationParameters_InitWithCustomValues() { + let parameters = QuantizationParameters(scale: 0.5, zeroPoint: 1) + XCTAssertEqual(parameters.scale, 0.5) + XCTAssertEqual(parameters.zeroPoint, 1) + } + + func testQuantizationParameters_Equatable() { + let parameters1 = QuantizationParameters(scale: 0.5, zeroPoint: 1) + let parameters2 = QuantizationParameters(scale: 0.5, zeroPoint: 1) + XCTAssertEqual(parameters1, parameters2) + + let parameters3 = QuantizationParameters(scale: 0.4, zeroPoint: 1) + XCTAssertNotEqual(parameters1, parameters3) + XCTAssertNotEqual(parameters2, parameters3) + } +} + +// MARK: - Extensions + +extension QuantizationParameters: Equatable { + public static func == (lhs: QuantizationParameters, rhs: QuantizationParameters) -> Bool { + return lhs.scale == rhs.scale && lhs.zeroPoint == rhs.zeroPoint + } +} diff --git a/tensorflow/lite/experimental/swift/Tests/TensorTests.swift b/tensorflow/lite/experimental/swift/Tests/TensorTests.swift new file mode 100644 index 0000000000..4540043a16 --- /dev/null +++ b/tensorflow/lite/experimental/swift/Tests/TensorTests.swift @@ -0,0 +1,83 @@ +// Copyright 2018 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@testable import TensorFlowLite +import XCTest + +class TensorTests: XCTestCase { + + // MARK: - Tensor + + func testTensor_Init() { + let name = "InputTensor" + let dataType: TensorDataType = .uInt8 + let shape = TensorShape(Constant.dimensions) + guard let data = name.data(using: .utf8) else { XCTFail("Data should not be nil."); return } + let quantizationParameters = QuantizationParameters(scale: 0.5, zeroPoint: 1) + let inputTensor = Tensor( + name: name, + dataType: dataType, + shape: shape, + data: data, + quantizationParameters: quantizationParameters + ) + XCTAssertEqual(inputTensor.name, name) + XCTAssertEqual(inputTensor.dataType, dataType) + XCTAssertEqual(inputTensor.shape, shape) + XCTAssertEqual(inputTensor.data, data) + XCTAssertEqual(inputTensor.quantizationParameters, quantizationParameters) + } + + // MARK: - TensorShape + + func testTensorShape_InitWithArray() { + let shape = TensorShape(Constant.dimensions) + XCTAssertEqual(shape.rank, Constant.dimensions.count) + XCTAssertEqual(shape.dimensions, Constant.dimensions) + } + + func testTensorShape_InitWithElements() { + let shape = TensorShape(2, 2, 3) + XCTAssertEqual(shape.rank, Constant.dimensions.count) + XCTAssertEqual(shape.dimensions, Constant.dimensions) + } + + func testTensorShape_InitWithArrayLiteral() { + let shape: TensorShape = [2, 2, 3] + XCTAssertEqual(shape.rank, Constant.dimensions.count) + XCTAssertEqual(shape.dimensions, Constant.dimensions) + } +} + +// MARK: - Constants + +private enum Constant { + /// Array of 2 arrays of 2 arrays of 3 numbers: [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]. + static let dimensions = [2, 2, 3] +} + +// MARK: - Extensions + +extension TensorShape: Equatable { + public static func == (lhs: TensorShape, rhs: TensorShape) -> Bool { + return lhs.rank == rhs.rank && lhs.dimensions == rhs.dimensions + } +} + +extension Tensor: Equatable { + public static func == (lhs: Tensor, rhs: Tensor) -> Bool { + return lhs.name == rhs.name && lhs.dataType == rhs.dataType && lhs.shape == rhs.shape && + lhs.data == rhs.data && lhs.quantizationParameters == rhs.quantizationParameters + } +} diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index 903a7f9773..3bcc4fc81b 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -34,6 +34,7 @@ PIP_PACKAGE_QUERY_EXPRESSION = ( # pip smoke test. BUILD_BLACKLIST = [ "tensorflow/lite/examples/android", + "tensorflow/lite/experimental/objc", "tensorflow/lite/experimental/swift", ] -- GitLab From 2caa84e53bca411ba8ab6f8aa71327fc047c37fd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 16:46:09 -0800 Subject: [PATCH 168/351] Adding additional test flags. PiperOrigin-RevId: 234053954 --- tensorflow/tools/ci_build/builds/pip_new.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/builds/pip_new.sh b/tensorflow/tools/ci_build/builds/pip_new.sh index 079ba90043..b8b693eb45 100755 --- a/tensorflow/tools/ci_build/builds/pip_new.sh +++ b/tensorflow/tools/ci_build/builds/pip_new.sh @@ -553,7 +553,7 @@ run_test_with_bazel() { fi # Run the test. - bazel test ${BAZEL_BUILD_FLAGS} ${BAZEL_PARALLEL_TEST_FLAGS} --test_tag_filters=${BAZEL_TEST_FILTER_TAGS} -- ${BAZEL_TEST_TARGETS} + bazel test --build_tests_only ${BAZEL_BUILD_FLAGS} ${BAZEL_PARALLEL_TEST_FLAGS} --test_tag_filters=${BAZEL_TEST_FILTER_TAGS} -- ${BAZEL_TEST_TARGETS} } run_all_tests() { -- GitLab From 47694b7e4594bef65812c7163761ec0fbdee2631 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 14 Feb 2019 17:00:23 -0800 Subject: [PATCH 169/351] [XLA:CLIENT] Implement a varient of gather equivalent to torch.gather (https://pytorch.org/docs/stable/torch.html#torch.gather). PiperOrigin-RevId: 234056021 --- tensorflow/compiler/xla/client/lib/slicing.cc | 27 +++++++++++++++++++ tensorflow/compiler/xla/client/lib/slicing.h | 14 ++++++++++ .../compiler/xla/client/lib/slicing_test.cc | 13 +++++++++ 3 files changed, 54 insertions(+) diff --git a/tensorflow/compiler/xla/client/lib/slicing.cc b/tensorflow/compiler/xla/client/lib/slicing.cc index 77145ba7d4..d7b33c5af2 100644 --- a/tensorflow/compiler/xla/client/lib/slicing.cc +++ b/tensorflow/compiler/xla/client/lib/slicing.cc @@ -134,4 +134,31 @@ XlaOp DynamicUpdateSliceInMinorDims(XlaOp x, XlaOp update, }); } +XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim) { + XlaBuilder* builder = input.builder(); + return builder->ReportErrorOrReturn([&]() -> StatusOr { + TF_ASSIGN_OR_RETURN(Shape index_shape, builder->GetShape(index)); + ShapeUtil::AppendMajorDimension(1, &index_shape); + std::vector to_concat; + TF_ASSIGN_OR_RETURN(Shape input_shape, builder->GetShape(input)); + to_concat.reserve(input_shape.rank()); + for (int64 i = 0; i < input_shape.rank(); ++i) { + if (i == dim) { + to_concat.push_back(Reshape(index, index_shape.dimensions())); + } else { + to_concat.push_back(Iota(builder, index_shape, i)); + } + } + XlaOp gather_indices = ConcatInDim(builder, to_concat, input_shape.rank()); + std::vector slice_sizes(input_shape.rank(), 1); + GatherDimensionNumbers gather_dnums; + gather_dnums.set_index_vector_dim(input_shape.rank()); + for (int64 i = 0; i < input_shape.rank(); ++i) { + gather_dnums.add_collapsed_slice_dims(i); + gather_dnums.add_start_index_map(i); + } + return Gather(input, gather_indices, gather_dnums, slice_sizes); + }); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/client/lib/slicing.h b/tensorflow/compiler/xla/client/lib/slicing.h index 6c482a38b5..69f98a6f43 100644 --- a/tensorflow/compiler/xla/client/lib/slicing.h +++ b/tensorflow/compiler/xla/client/lib/slicing.h @@ -43,6 +43,20 @@ XlaOp DynamicSliceInMinorDims(XlaOp x, absl::Span starts, XlaOp DynamicUpdateSliceInMinorDims(XlaOp x, XlaOp update, absl::Span starts); +// Gathers values along an axis specified by dim. +// +// For a 3-D tensor the output is specified by: +// +// out[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0 +// out[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1 +// out[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2 +// +// If `input` is an n-dimensional tensor with size +// [X0,X1,X2,..XN] and dim = i `index` must be an n-dimensional tensor with size +// [X0,X1,...Y,Xi+1,...,X[N] where y >= 1 and `out` will have the same sizes as +// `index`. +XlaOp TorchGather(XlaOp input, XlaOp index, int64 dim); + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_CLIENT_LIB_SLICING_H_ diff --git a/tensorflow/compiler/xla/client/lib/slicing_test.cc b/tensorflow/compiler/xla/client/lib/slicing_test.cc index 8d362119e0..db6ebb9df1 100644 --- a/tensorflow/compiler/xla/client/lib/slicing_test.cc +++ b/tensorflow/compiler/xla/client/lib/slicing_test.cc @@ -102,5 +102,18 @@ XLA_TEST_F(SlicingTest, SimpleSliceUpdate) { {a_data.get(), b_data.get(), x_data.get(), y_data.get()}); } +XLA_TEST_F(SlicingTest, TorchGather) { + xla::XlaBuilder builder(TestName()); + + xla::XlaOp input, index; + auto input_data = + CreateR2Parameter({{1, 2}, {3, 4}}, 0, "input", &builder, &input); + auto index_data = + CreateR2Parameter({{0, 0}, {1, 0}}, 1, "index", &builder, &index); + TorchGather(input, index, 1); + + ComputeAndCompareR2(&builder, {{1, 1}, {4, 3}}, + {input_data.get(), index_data.get()}); +} } // namespace } // namespace xla -- GitLab From eba7103aab6aa5b51f8f2de76dbbf9ced9d1c835 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 17:24:10 -0800 Subject: [PATCH 170/351] [tensor tracer] Extending tracing support for non-estimator models. PiperOrigin-RevId: 234059243 --- .../contrib/tpu/python/tpu/tensor_tracer.py | 17 ++++++++++++++++- tensorflow/contrib/tpu/python/tpu/tpu.py | 11 +++++++++++ .../contrib/tpu/python/tpu/tpu_estimator.py | 8 -------- .../contrib/tpu/python/tpu/training_loop.py | 17 +++++++++++++---- 4 files changed, 40 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py index 43b9168ecc..ae05822084 100644 --- a/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py +++ b/tensorflow/contrib/tpu/python/tpu/tensor_tracer.py @@ -237,7 +237,8 @@ class TensorTracer(object): (2) which Ops to be traced (via op.name or op.type) (3) output trace file path. """ - + # The set of graphs that are rewritten by tensor tracer. + _traced_graphs = set() @staticmethod def _match_next_flag(flags, pos): """Returns the match for the next TensorTracer flag. @@ -1559,6 +1560,12 @@ class TensorTracer(object): RuntimeError: If tensor_fetches is None or empty. """ + if graph in TensorTracer._traced_graphs: + logging.warning('Graph is already rewritten with tensor tracer, ignoring ' + 'multiple calls.') + return tensor_fetches + else: + TensorTracer._traced_graphs.add(graph) self._device_type = _DEVICE_TYPE_TPU self._num_replicas = num_replicas self._num_replicas_per_host = num_replicas_per_host @@ -1604,6 +1611,14 @@ class TensorTracer(object): Raises: RuntimeError: If tensor_fetches is None or empty. """ + + if graph in TensorTracer._traced_graphs: + logging.warning('Graph is already rewritten with tensor tracer, ignoring ' + 'multiple calls.') + return tensor_fetches + else: + TensorTracer._traced_graphs.add(graph) + self._device_type = _DEVICE_TYPE_CPU self._num_replicas = 1 self._num_replicas_per_host = 1 diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 673129b4be..6f5cc00c9d 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -903,6 +903,17 @@ def split_compile_and_replicate(computation, else: output_tensors, control_deps = _postprocess_non_flat_outputs(outputs) + # tensor_tracer imports tpu.py. Local import to tensor_tracer to avoid + # import-cycle + # pylint: disable=g-import-not-at-top + from tensorflow.contrib.tpu.python.tpu import tensor_tracer + # pylint: enable=g-import-not-at-top + if tensor_tracer.TensorTracer.is_enabled(): + tt = tensor_tracer.TensorTracer() + output_tensors = tt.trace_tpu(ops.get_default_graph(), + output_tensors, control_deps, + num_replicas) + context.ExitResult(output_tensors) finally: context.report_unsupported_operations() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index afe0a04d3b..3066f0bcd8 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1446,14 +1446,6 @@ class _ModelFnWrapper(object): captured_training_hooks.capture(estimator_spec.training_hooks) - if tensor_tracer.TensorTracer.is_enabled(): - tt = tensor_tracer.TensorTracer() - loss = tt.trace_tpu(ops.get_default_graph(), - loss, train_op, - self._ctx.num_replicas, - self._ctx.num_of_replicas_per_host, - self._ctx.num_hosts) - if self._ctx.embedding_config is None: apply_sparse_grads = [] else: diff --git a/tensorflow/contrib/tpu/python/tpu/training_loop.py b/tensorflow/contrib/tpu/python/tpu/training_loop.py index 0187b4bec6..50848e83f0 100644 --- a/tensorflow/contrib/tpu/python/tpu/training_loop.py +++ b/tensorflow/contrib/tpu/python/tpu/training_loop.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.compiler import xla +from tensorflow.contrib.tpu.python.tpu import tensor_tracer from tensorflow.contrib.tpu.python.tpu import tpu_function from tensorflow.python.framework import ops @@ -157,10 +158,18 @@ def while_loop(condition, body, inputs=None, infeed_queue=None, name=None): # TODO(phawkins): in principle this is too restrictive since it serializes # the training loop steps. In practice it does not matter since this loop # will be compiled by XLA. - return control_flow_ops.tuple(output_tensors, - control_inputs=output_operations) - else: - return output_tensors + output_tensors = control_flow_ops.tuple(output_tensors, + control_inputs=output_operations) + + if tensor_tracer.TensorTracer.is_enabled(): + num_replicas = tpu_function.get_tpu_context().number_of_shards + if num_replicas is None: + num_replicas = 1 + tt = tensor_tracer.TensorTracer() + output_tensors = tt.trace_tpu(ops.get_default_graph(), + output_tensors, None, + num_replicas) + return output_tensors # If the body has arity 0, add a dummy loop-carried value to which we can add # control dependencies from any side-effecting operations. -- GitLab From 51a37c2512a177c07484bc3ab21303cb6de5f2c4 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 14 Feb 2019 17:55:04 -0800 Subject: [PATCH 171/351] Internal change. PiperOrigin-RevId: 234063110 --- tensorflow/tools/api/tests/api_compatibility_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index dad798c838..fe52a2bde7 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -78,6 +78,13 @@ _UPDATE_WARNING_FILE = 'tensorflow/tools/api/tests/API_UPDATE_WARNING.txt' _NON_CORE_PACKAGES = ['estimator'] +# TODO(annarev): remove this once we test with newer version of +# estimator that actually has compat v1 version. +if not hasattr(tf.compat.v1, 'estimator'): + tf.compat.v1.estimator = tf.estimator + tf.compat.v2.estimator = tf.estimator + + def _KeyToFilePath(key, api_version): """From a given key, construct a filepath. -- GitLab From f855ea1ca6cdd10ae769ad2a44a3078cee984582 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 18:09:52 -0800 Subject: [PATCH 172/351] C model code (non-depth-mult) for optimized depthwise conv, part II. PiperOrigin-RevId: 234064960 --- .../internal/depthwiseconv_quantized_test.cc | 30 +- .../depthwiseconv_uint8_transitional.h | 457 +++++++++++++++++- 2 files changed, 479 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc index a990e57cdf..f46f185510 100644 --- a/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc +++ b/tensorflow/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -36,6 +36,7 @@ limitations under the License. namespace tflite { namespace { +using optimized_ops::depthwise_conv::DotProduct3x3KernelType; using ::testing::Bool; using ::testing::Values; @@ -138,11 +139,24 @@ inline void DispatchDepthwiseConv( #endif } case DepthwiseConvImplementation::kUseNeon3x3DotProduct: - case DepthwiseConvImplementation::kUseCModel3x3DotProduct: case DepthwiseConvImplementation::kUseUnwound3x3DotProduct: case DepthwiseConvImplementation::kUseIntrinsics3x3DotProduct: // TODO(b/118426582) Placeholder for future dispatches. break; + case DepthwiseConvImplementation::kUseCModel3x3DotProduct: { + DotProduct3x3KernelType kernel_type = + optimized_ops::depthwise_conv::CategorizeDotProductKernel(params); + + ASSERT_TRUE(kernel_type == DotProduct3x3KernelType::kPlain || + kernel_type == DotProduct3x3KernelType::kStride2) + << "Kernel type = " << static_cast(kernel_type); + + optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3< + DepthwiseConvImplementation::kUseCModel3x3DotProduct>( + params, input_shape, input_data, filter_shape, filter_data, + bias_shape, bias_data, output_shape, output_data); + return; + } case DepthwiseConvImplementation::kUseGenericKernel: { optimized_ops::depthwise_conv::DepthwiseConvGeneral( params, input_shape, input_data, filter_shape, filter_data, @@ -622,5 +636,19 @@ INSTANTIATE_TEST_SUITE_P( ), TestParam::TestNameSuffix); +INSTANTIATE_TEST_SUITE_P( + CModel, DepthwiseConvTest, + testing::Combine( + Values(DepthwiseConvImplementation:: + kUseCModel3x3DotProduct), // forced_invocation + Values(1000), // tests_to_run + Bool(), // test_stride + Bool(), // test_pad + Values(false), // test_depth_multiplier + Values(DepthwiseConvOutputRounding::kUpward), // output_rounding + Values(false) // loose_tolerance + ), + TestParam::TestNameSuffix); + } // namespace } // namespace tflite diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h index 148001f444..f7ab364b7f 100644 --- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h +++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_transitional.h @@ -52,7 +52,103 @@ struct ProcessPerDepth { }; template <> -struct ProcessPerDepth {}; +struct ProcessPerDepth { + // Filter data is provided as filter_block[3][3][depth/8][2][4]: height 3, + // width 3, sub-block 0 or 1, depth 4. Filter data is written as + // filter_bank[3][2][4][4]; height 3, sub-block, depth 4, width 4. + // + // Note that this rearrangement is much like that performed on input data when + // filling the workspace, and optimized versions will be similar. + static inline void FillFilterBank(int depth, const uint8* filter_block, + int8 filter_bank[3][2][4][4]) { + constexpr int kSymmetricZeroPoint = 128; + // Load filter data in, 8-bytes down depth / sub-block at a time. + // + // loaded_filter has dimensions height 3, width 4, sub-block 0 or 1, + // depth 4. + uint8 loaded_filter[3][4][2][4]; + for (int y = 0; y < 3; ++y) { + for (int x = 0; x < 3; ++x) { + memcpy(loaded_filter[y][x][0], &filter_block[3 * y * depth + x * depth], + 8); + } + // Pad the filter with symmetric representation of 0, so that the values + // become 0 when the zero-poing is added below. Thus these filter taps are + // effectively disregarded in later filtering. + memset(loaded_filter[y][3][0], kSymmetricZeroPoint, 8); + } + for (int y = 0; y < 3; ++y) { + for (int z = 0; z < 4; ++z) { + for (int x = 0; x < 4; ++x) { + filter_bank[y][0][z][x] = + loaded_filter[y][x][0][z] - kSymmetricZeroPoint; + filter_bank[y][1][z][x] = + loaded_filter[y][x][1][z] - kSymmetricZeroPoint; + } + } + } + } + + // Adjust the bias (weights) data according to the input offset. + // + // The output calculation is + // out[h][w][d] = bias[d] + sum_ij (in[h+i][w+j][d] + in_offset) * + // (filter[i][j][d] + filter_offset) + // (where offsets are expressed as differences from 128). + // + // Since we cannot efficiently handle varying offsets / bias across the image, + // we insist on filter_offset = 0. + // + // This function calculates + // adjusted_bias[d] = bias[d] + sum_ij in_offset * filter[i][j][d] + // which accounts for input offset. If the bias is constant over the depth, + // the adjusted bias will vary. + static inline void AdjustBias(int32 input_offset, + const int8 filter_bank[3][2][4][4], + const int32* bias_data, + int32 adjusted_bias_block[2][4]) { + constexpr int kSymmetricZeroPoint = 128; + TFLITE_DCHECK_GE(input_offset, -255); + TFLITE_DCHECK_LE(input_offset, 0); + // For instance, if input_offset == 128, no adjustment is needed. + const int32 input_offset_difference = input_offset + kSymmetricZeroPoint; + + for (int s = 0; s < 2; ++s) { + for (int z = 0; z < 4; ++z) { + adjusted_bias_block[s][z] = bias_data[4 * s + z]; + for (int i = 0; i < 9; ++i) { + adjusted_bias_block[s][z] += + input_offset_difference * filter_bank[i % 3][s][z][i / 3]; + } + } + } + } + + static void Run(const uint8* filter_data, const int32* bias_data, + int8* shuffled_filter_data, int32* adjusted_bias_data, + const DepthwiseConvDotProdParams* function_params) { + constexpr int shuffled_filter_increment = 2 * 3 * 4 * 4; + const int depth = function_params->output_depth; + const int bias_increment = function_params->bias_increment; + const int32 input_offset = function_params->input_offset; + + int8 filter_bank[3][2][4][4]; + int32 adjusted_bias_block[2][4]; + + for (int j_depth = 0; j_depth < (depth >> 3); ++j_depth) { + FillFilterBank(depth, filter_data + 8 * j_depth, filter_bank); + AdjustBias(input_offset, filter_bank, + bias_data + 2 * j_depth * bias_increment, adjusted_bias_block); + + memcpy(shuffled_filter_data, filter_bank[0][0][0], + shuffled_filter_increment); + shuffled_filter_data += shuffled_filter_increment; + memcpy(adjusted_bias_data, adjusted_bias_block[0], + 8 * sizeof(adjusted_bias_block[0][0])); + adjusted_bias_data += 8; + } + } +}; // Copy a macro block of data from the input buffer into the workspace, // permuting data within each micro block. @@ -74,17 +170,199 @@ struct PackMacroBlock { // implementation rather than conforming to style. }; -// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. template struct PackMacroBlock { + // A straight copy of a macro block of input data into a scratch buffer. + // + // Requirement: depth_micro_repeats > 0. + static inline void CopyMacroBlock( + int32 height_block_number, int32 width_block_number, + const DepthwiseConvDotProdParams& function_params, + const uint8* input_block_data, int8* scratch_block_data) { + TFLITE_DCHECK_LE(max_padding, 1); + + // Strides. + // The input depth and count of micro blocks provide the width strides. + const int input_height_stride = function_params.input_height_stride; + const int workspace_height_stride = function_params.workspace_height_stride; + const int input_depth = function_params.input_depth; + const int depth_micro_repeats = function_params.depth_micro_repeats; + TFLITE_DCHECK_GT(depth_micro_repeats, 0); + + // Remaining iteration and dimension parameters. + // + // If width_overall_micro_repeats = input_width_micro_repeats + 1, then the + // final micro block is incomplete. + const int width_overall_micro_repeats = + function_params.input_width_overall_micro_repeats; + int input_width_micro_repeats = function_params.input_width_micro_repeats; + const int residual_width = function_params.residual_width; + const int block_height = function_params.inbound_block_height; + + const int padding_left = function_params.padding_left; + const int padding_right = function_params.padding_right; + const int padding_top = function_params.padding_top; + const int padding_bottom = function_params.padding_bottom; + + const bool leading_width_padding = + padding_left > 0 && width_block_number == 0; + const bool trailing_width_padding = + padding_right > 0 && + width_block_number == (function_params.width_macro_count - 1); + const bool leading_height_padding = + padding_top > 0 && height_block_number < 0; + const bool trailing_height_padding = + padding_bottom > 0 && + height_block_number == (function_params.height_macro_count - 1); + + // Modify the trailing case to reflect the input width. + int input_residual_width = + input_width_micro_repeats < width_overall_micro_repeats ? residual_width + : 4; + if (trailing_width_padding) { + input_residual_width -= 1; + input_width_micro_repeats = width_overall_micro_repeats - 1; + } + + constexpr int kSymmetricZeroPoint = 128; + const int32 input_offset_difference = + function_params.input_offset + kSymmetricZeroPoint; + + // We load data into a temporary buffer and then save, to match subsequent + // processing. This will make it easier to combine stages into one ASM + // routine. + int8 tmp_load[4][2][4]; + + int copy_block_height = block_height; + if (leading_height_padding) { + memset(scratch_block_data, -input_offset_difference, + workspace_height_stride); + scratch_block_data += workspace_height_stride; + input_block_data += input_height_stride; + copy_block_height -= 1; + } + if (trailing_height_padding) { + copy_block_height -= 1; + } + + // The outer 3 loops go through all the micro blocks in a macro block. + for (int k_height = 0; k_height < copy_block_height; ++k_height) { + for (int j_width = 0; j_width < width_overall_micro_repeats; ++j_width) { + // Figure out division of work (available input vs trailing padding). + int adjusted_residual_width = + j_width == input_width_micro_repeats ? input_residual_width : 4; + + int start_width = 0; + if (leading_width_padding && j_width == 0) { + start_width = 1; + memset(tmp_load[0][0], -input_offset_difference, 8); + } + if (adjusted_residual_width < 4) { + for (int x = adjusted_residual_width; x < 4; ++x) { + memset(tmp_load[x][0], -input_offset_difference, 8); + } + } + + for (int i_depth = 0; i_depth < depth_micro_repeats; ++i_depth) { + // The inner 3 loops go through the sub-block, depth and width within + // each micro block. + + // Load, and apply symmetric offset. + int8* scratch_data = + scratch_block_data + k_height * workspace_height_stride + + j_width * 4 * 8 + i_depth * 4 * 8 * width_overall_micro_repeats; + const uint8* input_data = input_block_data + + k_height * input_height_stride + + j_width * 4 * input_depth + i_depth * 8; + // Full-size macro blocks are 2*4*4 = 32 bytes. + for (int x = start_width; x < adjusted_residual_width; ++x) { + for (int s = 0; s < 2; ++s) { + for (int d = 0; d < 4; ++d) { + tmp_load[x][s][d] = input_data[x * input_depth + 4 * s + d] - + kSymmetricZeroPoint; + } + } + } + + // Save results. + memcpy(&scratch_data[0], tmp_load[0][0], 8); + memcpy(&scratch_data[8], tmp_load[1][0], 8); + memcpy(&scratch_data[16], tmp_load[2][0], 8); + memcpy(&scratch_data[24], tmp_load[3][0], 8); + } + } + } + + if (trailing_height_padding) { + memset(scratch_block_data + copy_block_height * workspace_height_stride, + -input_offset_difference, workspace_height_stride); + } + } + + // Transpose 4x4 blocks within each sub-micro-block. + // + // Implemented somewhat like NEON register manipulation, so that we can see + // equivalence of the two approaches. + static inline void MicroTransposeBlocks( + const DepthwiseConvDotProdParams& function_params, + int8* scratch_block_data) { + const int workspace_height_stride = function_params.workspace_height_stride; + const int width_overall_micro_repeats = + function_params.input_width_overall_micro_repeats; + const int depth_micro_repeats = function_params.depth_micro_repeats; + const int block_height = function_params.inbound_block_height; + + // Transpositions are 4x4, but doing 2 at a time is more efficient in the + // NEON code we are simulating. + int8 tmp_load[4][2][4]; // [width][sub-block][depth] + int8 tmp_transposed[4][2][4]; // [depth][sub-block][width] + int8 tmp_interleaved[2][4][4]; // [sub-block][depth][width] + + // The outer 3 loops go through all the micro blocks in a macro block. + for (int k_height = 0; k_height < block_height; ++k_height) { + for (int j_width = 0; j_width < width_overall_micro_repeats; ++j_width) { + for (int i_depth = 0; i_depth < depth_micro_repeats; ++i_depth) { + int8* scratch_data = + scratch_block_data + k_height * workspace_height_stride + + j_width * 4 * 8 + i_depth * 4 * 8 * width_overall_micro_repeats; + // A. Load data + memcpy(tmp_load[0][0], &scratch_data[0], 8); + memcpy(tmp_load[1][0], &scratch_data[8], 8); + memcpy(tmp_load[2][0], &scratch_data[16], 8); + memcpy(tmp_load[3][0], &scratch_data[24], 8); + + // B. Simulate between-register transposition. + for (int x = 0; x < 4; ++x) { + for (int y = 0; y < 4; ++y) { + tmp_transposed[x][0][y] = tmp_load[y][0][x]; + tmp_transposed[x][1][y] = tmp_load[y][1][x]; + } + } + + // C. Simulate between-register interleaving. + for (int x = 0; x < 4; ++x) { + for (int y = 0; y < 4; ++y) { + tmp_interleaved[0][x][y] = tmp_transposed[x][0][y]; + tmp_interleaved[1][x][y] = tmp_transposed[x][1][y]; + } + } + // D. Simulate mangled storage arrangement. + memcpy(&scratch_data[0], tmp_interleaved[0][0], 16); + memcpy(&scratch_data[16], tmp_interleaved[1][0], 16); + } + } + } + } + static inline void Run(int32 height_block_number, int32 width_block_number, const uint8* input_block_data, int8* scratch_block_data, const DepthwiseConvDotProdParams* function_params) { - TFLITE_DCHECK(false); - return; + CopyMacroBlock(height_block_number, width_block_number, *function_params, + input_block_data, scratch_block_data); + MicroTransposeBlocks(*function_params, scratch_block_data); } }; @@ -119,17 +397,182 @@ struct KernelMacroBlock { // implementation rather than conforming to style. }; -// TODO(b/118877434) Placeholder, to be implemented in subsequent CL. +// Apply filter to macro block of input data and store results. +// +// Requirement: depth_micro_repeats > 0 || residual_depth > 0. template struct KernelMacroBlock { + // Construct a width-shifted combination of two input sub-blocks, effectively + // concatenating them. + // + // The filter is applied using sub-blocks. These are in the needed form for + // the first (width) offset. For subsequent offsets, the filter is applied to + // shifted and combined data. The concatentation and shifting herein is fairly + // straightforward, but in the optimized code is an area of creativity in + // design because NEON instructions do not directly support the required + // between-register permutation. + // + // In NEON optimized code, input data is grouped in 4-byte blocks. In order to + // move along the width for each output point calculation, data is shifted, in + // essence between two such blocks. + // + // selected_data has format height 3, depth 4, width 4. + // + // When the micro block is trailing (the last across the macro-block width), + // it would be illegal to load the right (next) block, and the no_right_block + // indicates this scenario. + static inline void ConcatenateInputSubBlocks(int offset, int sub_block, + int workspace_height_stride, + int width_micro_stride, + bool no_right_block, + const int8* input_block, + int8 selected_data[3][4][4]) { + TFLITE_DCHECK_GE(offset, 0); + TFLITE_DCHECK_LT(offset, 4); + + // The input banks have same format as selected_data. + int8 left_bank[3][4][4]; + int8 right_bank[3][4][4]; + + // Work through one slice, by row, at a time. + for (int k_height = 0; k_height < 3; ++k_height) { + // Simulate demangling of mangled storage arrangement. + const int8* left_input_block = + &input_block[k_height * workspace_height_stride + sub_block * 2 * 8]; + memcpy(left_bank[k_height][0], left_input_block, 16); + if (no_right_block) { + memset(right_bank[k_height][0], 0, 16); + } else { + const int8* right_input_block = + &input_block[k_height * workspace_height_stride + + sub_block * 2 * 8 + width_micro_stride]; + memcpy(right_bank[k_height][0], right_input_block, 16); + } + for (int depth_index = 0; depth_index < 4; ++depth_index) { + memcpy(selected_data[k_height][depth_index], + &left_bank[k_height][depth_index][offset], 4 - offset); + memcpy(&selected_data[k_height][depth_index][4 - offset], + right_bank[k_height][depth_index], offset); + } + } + } + + // Straight implementation of 3x3 filter within sub-micro block. + static inline void Calculate3x3FilterOutput( + const DepthwiseConvDotProdParams& params, int sub_block, + const int8 selected_data[3][4][4], const int8 filter_bank[3][2][4][4], + const int32* bias_data, uint8 output_values[4]) { + const int32 output_activation_min = params.quantized_activation_min; + const int32 output_activation_max = params.quantized_activation_max; + const int32 output_multiplier = params.output_multiplier; + const int32 output_shift = params.output_shift; + const int32 output_offset = params.output_offset; + for (int d = 0; d < 4; ++d) { + int32 acc = 0; + for (int y = 0; y < 3; ++y) { + for (int x = 0; x < 4; ++x) { + int32 input_val = selected_data[y][d][x]; + int32 filter_val = filter_bank[y][sub_block][d][x]; + acc += filter_val * input_val; + } + } + acc += bias_data[d]; + acc = reference_ops::depthwise_conv::DepthwiseConvRound< + DepthwiseConvOutputRounding::kUpward>(acc, output_multiplier, + output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_values[d] = static_cast(acc); + } + } + static inline void Run(const int8* scratch_block_data, const int8* filter_workspace, const int32* bias_data, uint8* output_block_data, const DepthwiseConvDotProdParams* function_params) { - TFLITE_DCHECK(false); - return; + const int workspace_height_stride = + function_params->workspace_height_stride; + const int input_width_overall_micro_repeats = + function_params->input_width_overall_micro_repeats; + const int output_width_micro_repeats = + function_params->output_width_micro_repeats; + const int depth_micro_repeats = function_params->depth_micro_repeats; + const int depth = function_params->input_depth; + const int stride_val = function_params->stride; + const int four_over_stride = function_params->four_over_stride; + + const int workspace_width_micro_repeats = + function_params->workspace_width_micro_repeats; + const int output_width_overall_micro_repeats = + function_params->output_width_overall_micro_repeats; + const int block_height = function_params->outbound_block_height; + const int residual_width = function_params->output_residual_width; + const int output_height_stride = function_params->output_height_stride; + constexpr int bias_increment = 4; + TFLITE_DCHECK_EQ(function_params->bias_increment, bias_increment); + + TFLITE_DCHECK(depth_micro_repeats > 0); + const int width_micro_stride = 4 * 8; + const int depth_micro_stride = + width_micro_stride * input_width_overall_micro_repeats; + + constexpr int shuffled_filter_increment = 2 * 3 * 4 * 4; + + // Simulate NEON-register transposition of subset of filter. + int8 filter_bank[3][2][4][4]; // Height 3, sub-block, depth 4, width 4. + // Simulate NEON-register input data concatenation + sub-selection. + int8 sub_selected_input_data[3][4][4]; // Height 3, depth 4, width 4. + uint8 output_values[4]; // Depth 4. + + // The outer 3 loops go through all the micro blocks in a macro block, and + // separately treat the two sub-blocks within each micro block. + for (int j_depth = 0; j_depth < depth_micro_repeats; ++j_depth) { + memcpy(filter_bank[0][0][0], + filter_workspace + j_depth * shuffled_filter_increment, + shuffled_filter_increment); + + for (int s = 0; s < 2; ++s) { + for (int k_height = 0; k_height < block_height; ++k_height) { + const int8* scratch_data = + scratch_block_data + + workspace_height_stride * k_height * stride_val + + depth_micro_stride * j_depth; + uint8* output_data = + output_block_data + output_height_stride * k_height + 8 * j_depth; + + for (int i_width = 0; i_width < output_width_overall_micro_repeats; + ++i_width) { + const int output_width = i_width == output_width_micro_repeats + ? residual_width + : four_over_stride; + const bool no_right_block = i_width == output_width_micro_repeats && + output_width_overall_micro_repeats == + workspace_width_micro_repeats; + TFLITE_DCHECK_LE(output_width * stride_val, 4); + const int8* input_data = + scratch_data + width_micro_stride * i_width; + // Iterate over input width shifts within sub-micro blocks. + for (int x = 0; x < output_width; ++x) { + ConcatenateInputSubBlocks(x * stride_val, s, + workspace_height_stride, + width_micro_stride, no_right_block, + input_data, sub_selected_input_data); + Calculate3x3FilterOutput( + *function_params, s, sub_selected_input_data, filter_bank, + bias_data + (2 * j_depth + s) * bias_increment, + output_values); + for (int d = 0; d < 4; ++d) { + output_data[depth * (four_over_stride * i_width + x) + 4 * s + + d] = output_values[d]; + } + } + } + } + } + } } }; -- GitLab From b23578d6056bee4d6572fc9bd430b44353e1fff0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 18:48:49 -0800 Subject: [PATCH 173/351] Implement Rank. PiperOrigin-RevId: 234068627 --- tensorflow/lite/build_def.bzl | 1 + tensorflow/lite/builtin_ops.h | 1 + tensorflow/lite/c/builtin_op_data.h | 3 + tensorflow/lite/c/builtin_op_data_test.cc | 1 + .../lite/core/api/flatbuffer_conversions.cc | 1 + .../writer/option_writer_generator.cc | 1 + tensorflow/lite/g3doc/tf_ops_compatibility.md | 11 ++ tensorflow/lite/kernels/BUILD | 14 ++ tensorflow/lite/kernels/rank.cc | 65 +++++++++ tensorflow/lite/kernels/rank_test.cc | 91 +++++++++++++ tensorflow/lite/kernels/register.cc | 2 + tensorflow/lite/nnapi_delegate.cc | 1 + tensorflow/lite/schema/schema.fbs | 5 + tensorflow/lite/schema/schema_generated.h | 124 +++++++++++++++++- tensorflow/lite/testing/generate_examples.py | 23 ++++ tensorflow/lite/toco/export_tensorflow.cc | 6 +- .../propagate_fixed_sizes.cc | 4 +- tensorflow/lite/toco/import_tensorflow.cc | 2 +- tensorflow/lite/toco/model.h | 13 +- tensorflow/lite/toco/tflite/operator.cc | 2 + tensorflow/lite/toco/tflite/operator_test.cc | 1 + 21 files changed, 354 insertions(+), 18 deletions(-) create mode 100644 tensorflow/lite/kernels/rank.cc create mode 100644 tensorflow/lite/kernels/rank_test.cc diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index 18da07bcfe..b69352481b 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -286,6 +286,7 @@ def generated_test_models(): "prelu", "pow", "range", + "rank", "reduce_any", "reduce_max", "reduce_min", diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h index 3a42a60cb8..361d501672 100644 --- a/tensorflow/lite/builtin_ops.h +++ b/tensorflow/lite/builtin_ops.h @@ -135,6 +135,7 @@ typedef enum { kTfLiteBuiltinGatherNd = 107, kTfLiteBuiltinCos = 108, kTfLiteBuiltinWhere = 109, + kTfLiteBuiltinRank = 110, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h index 332c2db145..40fea17fef 100644 --- a/tensorflow/lite/c/builtin_op_data.h +++ b/tensorflow/lite/c/builtin_op_data.h @@ -333,6 +333,9 @@ typedef struct { TfLiteType out_type; } TfLiteShapeParams; +typedef struct { +} TfLiteRankParams; + typedef struct { // Parameters supported by version 1: float min; diff --git a/tensorflow/lite/c/builtin_op_data_test.cc b/tensorflow/lite/c/builtin_op_data_test.cc index 4ce7c481e1..4967183dd5 100644 --- a/tensorflow/lite/c/builtin_op_data_test.cc +++ b/tensorflow/lite/c/builtin_op_data_test.cc @@ -71,6 +71,7 @@ TEST(IntArray, CanCompileStructs) { TfLiteTransposeConvParams transpose_conv_params; TfLiteSparseToDenseParams sparse_to_dense_params; TfLiteShapeParams shape_params; + TfLiteRankParams rank_params; TfLiteFakeQuantParams fake_quant_params; TfLitePackParams pack_params; TfLiteOneHotParams one_hot_params; diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 0224836135..09cef8da64 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -731,6 +731,7 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_ADD_N: case BuiltinOperator_GATHER_ND: case BuiltinOperator_WHERE: + case BuiltinOperator_RANK: break; } return kTfLiteOk; diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index 7381344613..317aaa77a3 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -179,6 +179,7 @@ class OpOptionData { op_to_option_["LOG"] = ""; op_to_option_["SQRT"] = ""; op_to_option_["RSQRT"] = ""; + op_to_option_["Rank"] = ""; // TODO(aselle): These are undesirable hacks. Consider changing C structs option_to_struct_["Pool2DOptions"] = "TfLitePoolParams"; diff --git a/tensorflow/lite/g3doc/tf_ops_compatibility.md b/tensorflow/lite/g3doc/tf_ops_compatibility.md index 5e2202cdef..d5b998df78 100644 --- a/tensorflow/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/lite/g3doc/tf_ops_compatibility.md @@ -725,6 +725,17 @@ Options { } ``` +**RANK** + +``` +Inputs { + 0: a tensor +} +Outputs { + 0: a 0-D int32 Tensor representing the rank of input +} +``` + **RELU** ``` diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index a87d4daf05..bf7dfb59f4 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -199,6 +199,7 @@ cc_library( "pooling.cc", "pow.cc", "range.cc", + "rank.cc", "reduce.cc", "reshape.cc", "resize_bilinear.cc", @@ -1096,6 +1097,19 @@ tf_cc_test( ], ) +tf_cc_test( + name = "rank_test", + size = "small", + srcs = ["rank_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/lite:framework", + "//tensorflow/lite/c:c_api_internal", + "//tensorflow/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "pow_test", size = "small", diff --git a/tensorflow/lite/kernels/rank.cc b/tensorflow/lite/kernels/rank.cc new file mode 100644 index 0000000000..8cef1f53a0 --- /dev/null +++ b/tensorflow/lite/kernels/rank.cc @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/lite/c/builtin_op_data.h" +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/kernel_util.h" +#include "tensorflow/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace rank { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + output->type = kTfLiteInt32; + + // Rank produces a 0-D int32 Tensor representing the rank of input. + TfLiteIntArray* output_size = TfLiteIntArrayCreate(0); + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_EQ(context, NumDimensions(output), 0); + + if (output->type == kTfLiteInt32) { + int32_t* output_data = GetTensorData(output); + *output_data = NumDimensions(input); + } else { + return kTfLiteError; + } + + return kTfLiteOk; +} + +} // namespace rank + +TfLiteRegistration* Register_RANK() { + static TfLiteRegistration r = {nullptr, nullptr, rank::Prepare, rank::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/kernels/rank_test.cc b/tensorflow/lite/kernels/rank_test.cc new file mode 100644 index 0000000000..3c31fc5866 --- /dev/null +++ b/tensorflow/lite/kernels/rank_test.cc @@ -0,0 +1,91 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class RankOpModel : public SingleOpModel { + public: + RankOpModel(std::initializer_list input_shape, TensorType input_type) { + TensorType output_type = TensorType_INT32; + input_ = AddInput(input_type); + output_ = AddOutput(output_type); + SetBuiltinOp(BuiltinOperator_RANK, BuiltinOptions_RankOptions, + CreateRankOptions(builder_).Union()); + BuildInterpreter({input_shape}); + } + + TfLiteStatus InvokeWithResult() { return interpreter_->Invoke(); } + + int input() { return input_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(RankOpTest, InputTypeFloat) { + RankOpModel model({1, 3, 1, 3, 5}, TensorType_FLOAT32); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5})); + EXPECT_TRUE(model.GetOutputShape().empty()); +} + +TEST(RankOpTest, InputTypeInt) { + RankOpModel model({1, 3, 1, 3, 5}, TensorType_INT32); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({5})); + EXPECT_TRUE(model.GetOutputShape().empty()); +} + +TEST(RankOpTest, ScalarTensor) { + RankOpModel model({}, TensorType_FLOAT32); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({0})); + EXPECT_TRUE(model.GetOutputShape().empty()); +} + +TEST(RankOpTest, EmptyTensor) { + RankOpModel model({1, 0}, TensorType_FLOAT32); + model.Invoke(); + + EXPECT_THAT(model.GetOutput(), ElementsAreArray({2})); + EXPECT_TRUE(model.GetOutputShape().empty()); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index 58f0dbe9e4..07d13418e0 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -113,6 +113,7 @@ TfLiteRegistration* Register_NOT_EQUAL(); TfLiteRegistration* Register_SQRT(); TfLiteRegistration* Register_RSQRT(); TfLiteRegistration* Register_SHAPE(); +TfLiteRegistration* Register_RANK(); TfLiteRegistration* Register_POW(); TfLiteRegistration* Register_FAKE_QUANT(); TfLiteRegistration* Register_PACK(); @@ -336,6 +337,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_SQRT, Register_SQRT()); AddBuiltin(BuiltinOperator_RSQRT, Register_RSQRT()); AddBuiltin(BuiltinOperator_SHAPE, Register_SHAPE()); + AddBuiltin(BuiltinOperator_RANK, Register_RANK()); AddBuiltin(BuiltinOperator_POW, Register_POW()); AddBuiltin(BuiltinOperator_FAKE_QUANT, Register_FAKE_QUANT(), 1, 2); AddBuiltin(BuiltinOperator_PACK, Register_PACK(), diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc index 065ae52b5e..8d90ec9e7c 100644 --- a/tensorflow/lite/nnapi_delegate.cc +++ b/tensorflow/lite/nnapi_delegate.cc @@ -668,6 +668,7 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_ADD_N: case tflite::BuiltinOperator_GATHER_ND: case tflite::BuiltinOperator_WHERE: + case tflite::BuiltinOperator_RANK: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index cf4d40aec6..5df56cce66 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -223,6 +223,7 @@ enum BuiltinOperator : byte { GATHER_ND = 107, COS = 108, WHERE = 109, + RANK = 110, } // Options for the builtin operators. @@ -312,6 +313,7 @@ union BuiltinOptions { GatherNdOptions, CosOptions, WhereOptions, + RankOptions, } enum Padding : byte { SAME, VALID } @@ -652,6 +654,9 @@ table ShapeOptions { out_type : TensorType; } +table RankOptions { +} + table PowOptions { } diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 4ddfc11e4c..e33f0a3653 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -217,6 +217,9 @@ struct NotEqualOptionsT; struct ShapeOptions; struct ShapeOptionsT; +struct RankOptions; +struct RankOptionsT; + struct PowOptions; struct PowOptionsT; @@ -545,11 +548,12 @@ enum BuiltinOperator { BuiltinOperator_GATHER_ND = 107, BuiltinOperator_COS = 108, BuiltinOperator_WHERE = 109, + BuiltinOperator_RANK = 110, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_WHERE + BuiltinOperator_MAX = BuiltinOperator_RANK }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[109] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[110] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -659,7 +663,8 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[109] { BuiltinOperator_ADD_N, BuiltinOperator_GATHER_ND, BuiltinOperator_COS, - BuiltinOperator_WHERE + BuiltinOperator_WHERE, + BuiltinOperator_RANK }; return values; } @@ -776,6 +781,7 @@ inline const char * const *EnumNamesBuiltinOperator() { "GATHER_ND", "COS", "WHERE", + "RANK", nullptr }; return names; @@ -873,11 +879,12 @@ enum BuiltinOptions { BuiltinOptions_GatherNdOptions = 83, BuiltinOptions_CosOptions = 84, BuiltinOptions_WhereOptions = 85, + BuiltinOptions_RankOptions = 86, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_WhereOptions + BuiltinOptions_MAX = BuiltinOptions_RankOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[86] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[87] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -964,7 +971,8 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[86] { BuiltinOptions_AddNOptions, BuiltinOptions_GatherNdOptions, BuiltinOptions_CosOptions, - BuiltinOptions_WhereOptions + BuiltinOptions_WhereOptions, + BuiltinOptions_RankOptions }; return values; } @@ -1057,6 +1065,7 @@ inline const char * const *EnumNamesBuiltinOptions() { "GatherNdOptions", "CosOptions", "WhereOptions", + "RankOptions", nullptr }; return names; @@ -1411,6 +1420,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_WhereOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RankOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -2122,6 +2135,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_WhereOptions ? reinterpret_cast(value) : nullptr; } + RankOptionsT *AsRankOptions() { + return type == BuiltinOptions_RankOptions ? + reinterpret_cast(value) : nullptr; + } + const RankOptionsT *AsRankOptions() const { + return type == BuiltinOptions_RankOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -6340,6 +6361,46 @@ inline flatbuffers::Offset CreateShapeOptions( flatbuffers::Offset CreateShapeOptions(flatbuffers::FlatBufferBuilder &_fbb, const ShapeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct RankOptionsT : public flatbuffers::NativeTable { + typedef RankOptions TableType; + RankOptionsT() { + } +}; + +struct RankOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RankOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RankOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RankOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RankOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + RankOptionsBuilder &operator=(const RankOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRankOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RankOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct PowOptionsT : public flatbuffers::NativeTable { typedef PowOptions TableType; PowOptionsT() { @@ -7806,6 +7867,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const WhereOptions *builtin_options_as_WhereOptions() const { return builtin_options_type() == BuiltinOptions_WhereOptions ? static_cast(builtin_options()) : nullptr; } + const RankOptions *builtin_options_as_RankOptions() const { + return builtin_options_type() == BuiltinOptions_RankOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -8177,6 +8241,10 @@ template<> inline const WhereOptions *Operator::builtin_options_as return builtin_options_as_WhereOptions(); } +template<> inline const RankOptions *Operator::builtin_options_as() const { + return builtin_options_as_RankOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -10374,6 +10442,29 @@ inline flatbuffers::Offset CreateShapeOptions(flatbuffers::FlatBuf _out_type); } +inline RankOptionsT *RankOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new RankOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void RankOptions::UnPackTo(RankOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RankOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRankOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRankOptions(flatbuffers::FlatBufferBuilder &_fbb, const RankOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RankOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRankOptions( + _fbb); +} + inline PowOptionsT *PowOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new PowOptionsT(); UnPackTo(_o, _resolver); @@ -11537,6 +11628,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -11895,6 +11990,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -12241,6 +12340,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateWhereOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(value); + return CreateRankOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -12587,6 +12690,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new WhereOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_RankOptions: { + value = new RankOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -13019,6 +13126,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_RankOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py index fd1a60a8fc..e8a3efdef3 100644 --- a/tensorflow/lite/testing/generate_examples.py +++ b/tensorflow/lite/testing/generate_examples.py @@ -2264,6 +2264,29 @@ def make_shape_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_rank_tests(zip_path): + """Make a set of tests to do rank.""" + + test_parameters = [{ + "input_dtype": [tf.float32, tf.int32], + "input_shape": [[], [0], [1, 1, 1, 3], [2, 3, 4, 5], [5, 5], [10]], + }] + + def build_graph(parameters): + """Build the rank op testing graph.""" + input_value = tf.placeholder(dtype=parameters["input_dtype"], name="input") + out = tf.rank(input_value) + return [input_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict=dict(zip(inputs, [input_value]))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_one_hot_tests(zip_path): """Make a set of tests to do one_hot.""" diff --git a/tensorflow/lite/toco/export_tensorflow.cc b/tensorflow/lite/toco/export_tensorflow.cc index 50a30f5162..c2952c7dd1 100644 --- a/tensorflow/lite/toco/export_tensorflow.cc +++ b/tensorflow/lite/toco/export_tensorflow.cc @@ -1305,7 +1305,8 @@ void ConvertTensorFlowShapeOperator(const Model& model, GetTensorFlowDataType(model, src_op.outputs[0])); } -void ConvertRankOperator(const Model& model, const RankOperator& src_op, +void ConvertRankOperator(const Model& model, + const TensorFlowRankOperator& src_op, GraphDef* tensorflow_graph) { tensorflow::NodeDef* rank_op = tensorflow_graph->add_node(); rank_op->set_op("Rank"); @@ -2274,7 +2275,8 @@ void ConvertOperator(const Model& model, const Operator& src_op, model, static_cast(src_op), tensorflow_graph); } else if (src_op.type == OperatorType::kRank) { - ConvertRankOperator(model, static_cast(src_op), + ConvertRankOperator(model, + static_cast(src_op), tensorflow_graph); } else if (src_op.type == OperatorType::kRange) { ConvertRangeOperator(model, static_cast(src_op), diff --git a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 5d867ee634..866076467f 100644 --- a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1517,7 +1517,7 @@ void ProcessPadV2Operator(Model* model, PadV2Operator* op) { output_array.copy_shape(output_shape); } -void ProcessRankOperator(Model* model, RankOperator* op) { +void ProcessRankOperator(Model* model, TensorFlowRankOperator* op) { CHECK_GE(op->inputs.size(), 1); CHECK_EQ(op->outputs.size(), 1); auto& output_array = model->GetArray(op->outputs[0]); @@ -2219,7 +2219,7 @@ void ProcessUniqueOperator(Model* model, UniqueOperator* op) { ProcessRangeOperator(model, static_cast(op)); break; case OperatorType::kRank: - ProcessRankOperator(model, static_cast(op)); + ProcessRankOperator(model, static_cast(op)); break; case OperatorType::kShape: ProcessShapeOperator(model, static_cast(op)); diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc index 033c84b002..deeda8229e 100644 --- a/tensorflow/lite/toco/import_tensorflow.cc +++ b/tensorflow/lite/toco/import_tensorflow.cc @@ -2472,7 +2472,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {"Prod", ConvertReduceOperator}, {"RandomUniform", ConvertRandomUniform}, {"Range", ConvertRangeOperator}, - {"Rank", ConvertSimpleOperator}, + {"Rank", ConvertSimpleOperator}, {"RealDiv", ConvertSimpleOperator}, {"Relu", ConvertSimpleOperator}, {"Relu6", ConvertSimpleOperator}, diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index 05cc3c0e7e..63911899ae 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -24,11 +24,11 @@ limitations under the License. #include #include "absl/types/optional.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/lite/toco/model_flags.pb.h" #include "tensorflow/lite/toco/runtime/types.h" #include "tensorflow/lite/toco/toco_port.h" #include "tensorflow/lite/toco/toco_types.h" -#include "tensorflow/core/platform/logging.h" namespace toco { @@ -1259,13 +1259,12 @@ struct RangeOperator : Operator { // Inputs: // inputs[0]: required: the input array // -// This operation outputs a 0-D integer tensor representing the rank of -// the input. +// This operation outputs a 0-D int32 Tensor representing the rank of input. // -// TensorFlow equivalent: Rank. We currently assume that the output is int32 -// and not int64. The output type could be stored herein. -struct RankOperator : Operator { - RankOperator() : Operator(OperatorType::kRank) {} +// TensorFlow equivalent: Rank. +struct TensorFlowRankOperator : Operator { + TensorFlowRankOperator() : Operator(OperatorType::kRank) {} + ArrayDataType output_data_type = ArrayDataType::kInt32; }; // Element-wise negation (-x) operator. diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index e0cc0d7523..f15efbca72 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -2452,6 +2452,8 @@ std::vector> BuildOperatorList( MakeUnique>("FILL", OperatorType::kFill)); ops.push_back(MakeUnique>( "REVERSE_V2", OperatorType::kReverseV2)); + ops.push_back(MakeUnique>( + "RANK", OperatorType::kRank)); return ops; } } // namespace diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index 34faa1dce7..f8be13b096 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -154,6 +154,7 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("FILL", OperatorType::kFill); CheckSimpleOperator("REVERSE_V2", OperatorType::kReverseV2); + CheckSimpleOperator("RANK", OperatorType::kRank); } TEST_F(OperatorTest, BuiltinAdd) { -- GitLab From 082a349b260c3ceeab7c616700fbe19810191216 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 14 Feb 2019 18:49:45 -0800 Subject: [PATCH 174/351] Internal change. PiperOrigin-RevId: 234068718 --- tensorflow/python/keras/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 1ff5ced615..9e60d0f100 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -582,6 +582,10 @@ cuda_py_test( "//tensorflow/python:client_testlib", ], shard_count = 8, + tags = [ + "manual", # b/124471597 + "notap", # b/124471597 + ], xla_enable_strict_auto_jit = True, ) -- GitLab From b36a310b7d1dbe9bbcf65ea02b0164688a9b8143 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 14 Feb 2019 18:54:05 -0800 Subject: [PATCH 175/351] Internal change. PiperOrigin-RevId: 234069221 --- tensorflow/contrib/checkpoint/python/BUILD | 1 + tensorflow/python/kernel_tests/BUILD | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD index 4e529322c7..c83d8bcd6e 100644 --- a/tensorflow/contrib/checkpoint/python/BUILD +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -120,4 +120,5 @@ tf_py_test( "//tensorflow/python/keras:layers", "//tensorflow/python/training/checkpointable:util", ], + tags = ["nooss"], # b/124472244 ) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 999863add4..1f06fb100b 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3098,6 +3098,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:data_flow_ops", ], + tags = ["nooss"], # b/124474135 xla_enable_strict_auto_jit = True, ) -- GitLab From b466cc0e3b4f9a23800c22364499d6da97794d02 Mon Sep 17 00:00:00 2001 From: Gaurav Jain Date: Thu, 14 Feb 2019 18:58:53 -0800 Subject: [PATCH 176/351] Export nest.is_sequence as tf.nest.is_nested PiperOrigin-RevId: 234069722 --- tensorflow/contrib/framework/__init__.py | 1 + tensorflow/python/util/nest.py | 14 +++++++++++++ tensorflow/python/util/nest_test.py | 20 +++++++++---------- .../tools/api/golden/v1/tensorflow.nest.pbtxt | 4 ++++ .../tools/api/golden/v2/tensorflow.nest.pbtxt | 4 ++++ .../tools/compatibility/tf_upgrade_v2.py | 2 ++ 6 files changed, 35 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 94fb35b334..063717f08a 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -127,6 +127,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] _nest_allowed_symbols = [ 'assert_same_structure', + 'is_nested', 'is_sequence', 'is_sequence_or_composite', 'flatten', diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py index e930096142..e79e717b57 100644 --- a/tensorflow/python/util/nest.py +++ b/tensorflow/python/util/nest.py @@ -184,6 +184,20 @@ is_sequence = _pywrap_tensorflow.IsSequence is_sequence_or_composite = _pywrap_tensorflow.IsSequenceOrComposite +@tf_export("nest.is_nested") +def is_nested(seq): + """Returns true if its input is a collections.Sequence (except strings). + + Args: + seq: an input sequence. + + Returns: + True if the sequence is a not a string and is a collections.Sequence or a + dict. + """ + return is_sequence(seq) + + @tf_export("nest.flatten") def flatten(structure, expand_composites=False): """Returns a flat list from a given nested structure. diff --git a/tensorflow/python/util/nest_test.py b/tensorflow/python/util/nest_test.py index ec559bd2ab..0540f71f7a 100644 --- a/tensorflow/python/util/nest_test.py +++ b/tensorflow/python/util/nest_test.py @@ -231,17 +231,17 @@ class NestTest(parameterized.TestCase, test.TestCase): ["and", "goodbye", "again"]) @test_util.assert_no_new_pyobjects_executing_eagerly - def testIsSequence(self): - self.assertFalse(nest.is_sequence("1234")) - self.assertTrue(nest.is_sequence([1, 3, [4, 5]])) - self.assertTrue(nest.is_sequence(((7, 8), (5, 6)))) - self.assertTrue(nest.is_sequence([])) - self.assertTrue(nest.is_sequence({"a": 1, "b": 2})) - self.assertFalse(nest.is_sequence(set([1, 2]))) + def testIsNested(self): + self.assertFalse(nest.is_nested("1234")) + self.assertTrue(nest.is_nested([1, 3, [4, 5]])) + self.assertTrue(nest.is_nested(((7, 8), (5, 6)))) + self.assertTrue(nest.is_nested([])) + self.assertTrue(nest.is_nested({"a": 1, "b": 2})) + self.assertFalse(nest.is_nested(set([1, 2]))) ones = array_ops.ones([2, 3]) - self.assertFalse(nest.is_sequence(ones)) - self.assertFalse(nest.is_sequence(math_ops.tanh(ones))) - self.assertFalse(nest.is_sequence(np.ones((4, 5)))) + self.assertFalse(nest.is_nested(ones)) + self.assertFalse(nest.is_nested(math_ops.tanh(ones))) + self.assertFalse(nest.is_nested(np.ones((4, 5)))) @parameterized.parameters({"mapping_type": _CustomMapping}, {"mapping_type": dict}) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt index ad5360e335..70bb6d760b 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.nest.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "flatten" argspec: "args=[\'structure\', \'expand_composites\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "is_nested" + argspec: "args=[\'seq\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "map_structure" argspec: "args=[\'func\'], varargs=structure, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt index ad5360e335..70bb6d760b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.nest.pbtxt @@ -8,6 +8,10 @@ tf_module { name: "flatten" argspec: "args=[\'structure\', \'expand_composites\'], varargs=None, keywords=None, defaults=[\'False\'], " } + member_method { + name: "is_nested" + argspec: "args=[\'seq\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "map_structure" argspec: "args=[\'func\'], varargs=structure, keywords=kwargs, defaults=None" diff --git a/tensorflow/tools/compatibility/tf_upgrade_v2.py b/tensorflow/tools/compatibility/tf_upgrade_v2.py index c2d7ab65f5..02f0e8401e 100644 --- a/tensorflow/tools/compatibility/tf_upgrade_v2.py +++ b/tensorflow/tools/compatibility/tf_upgrade_v2.py @@ -564,6 +564,8 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec): "tf.nest.assert_same_structure", "tf.contrib.framework.nest.flatten": "tf.nest.flatten", + "tf.contrib.framework.nest.is_sequence": + "tf.nest.is_nested", "tf.contrib.framework.nest.map_structure": "tf.nest.map_structure", "tf.contrib.framework.nest.pack_sequence_as": -- GitLab From 68b686af89da036281a16e60f4468b82c3880ac4 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Thu, 14 Feb 2019 19:23:36 -0800 Subject: [PATCH 177/351] Add support for passing list of lists to the `metrics` param in Keras `compile.` PiperOrigin-RevId: 234072399 --- tensorflow/python/keras/engine/training.py | 12 ++-- .../python/keras/engine/training_test.py | 72 +++++++++++++++++++ .../python/keras/engine/training_utils.py | 30 +++++--- 3 files changed, 99 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 5901f05e20..e3a54426b8 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -165,12 +165,14 @@ class Model(Network): passing a dictionary or a list of losses. The loss value that will be minimized by the model will then be the sum of all individual losses. - metrics: List of metrics to be evaluated by the model - during training and testing. - Typically you will use `metrics=['accuracy']`. + metrics: List of metrics to be evaluated by the model during training + and testing. Typically you will use `metrics=['accuracy']`. To specify different metrics for different outputs of a - multi-output model, you could also pass a dictionary, - such as `metrics={'output_a': 'accuracy'}`. + multi-output model, you could also pass a dictionary, such as + `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`. + You can also pass a list (len = len(outputs)) of lists of metrics + such as `metrics=[['accuracy'], ['accuracy', 'mse']]` or + `metrics=['accuracy', ['accuracy', 'mse']]`. loss_weights: Optional list or dictionary specifying scalar coefficients (Python floats) to weight the loss contributions of different model outputs. diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index b84b9cece8..c3a1de19ae 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -2260,6 +2260,67 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): model.evaluate(x_test, y_test, batch_size=5) self.assertEqual(self.evaluate(acc_obj.count), 10) + @keras_parameterized.run_with_all_model_types(exclude_models=['sequential']) + @keras_parameterized.run_all_keras_modes + def test_metrics_valid_compile_input_formats(self): + inp_1 = keras.layers.Input(shape=(1,), name='input_1') + inp_2 = keras.layers.Input(shape=(1,), name='input_2') + x = keras.layers.Dense(3, kernel_initializer='ones', trainable=False) + out_1 = keras.layers.Dense( + 1, kernel_initializer='ones', name='output_1', trainable=False) + out_2 = keras.layers.Dense( + 1, kernel_initializer='ones', name='output_2', trainable=False) + + branch_a = [inp_1, x, out_1] + branch_b = [inp_2, x, out_2] + model = testing_utils.get_multi_io_model(branch_a, branch_b) + + # list of metrics. + model.compile( + optimizer='rmsprop', + loss='mse', + metrics=[keras.metrics.MeanSquaredError()], + weighted_metrics=[keras.metrics.MeanSquaredError()], + run_eagerly=testing_utils.should_run_eagerly()) + + # list of list of metrics. + model.compile( + optimizer='rmsprop', + loss='mse', + metrics=[ + keras.metrics.MeanSquaredError(), + [keras.metrics.MeanSquaredError(), + keras.metrics.Accuracy()] + ], + weighted_metrics=[ + keras.metrics.MeanSquaredError(), + [keras.metrics.MeanSquaredError(), + keras.metrics.Accuracy()] + ], + run_eagerly=testing_utils.should_run_eagerly()) + + # dict of metrics. + model.compile( + optimizer='rmsprop', + loss='mse', + metrics={ + 'output_1': + keras.metrics.MeanSquaredError(), + 'output_2': [ + keras.metrics.MeanSquaredError(), + keras.metrics.Accuracy() + ], + }, + weighted_metrics={ + 'output_1': + keras.metrics.MeanSquaredError(), + 'output_2': [ + keras.metrics.MeanSquaredError(), + keras.metrics.Accuracy() + ], + }, + run_eagerly=testing_utils.should_run_eagerly()) + @keras_parameterized.run_all_keras_modes def test_invalid_metrics(self): num_classes = 5 @@ -2277,6 +2338,17 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase): metrics=metrics_module.CategoricalAccuracy(), run_eagerly=testing_utils.should_run_eagerly()) + inp = keras.layers.Input(shape=(1,)) + x = keras.layers.Dense(3, activation='relu')(inp) + out_1 = keras.layers.Dense(1, activation='sigmoid', name='output_1')(x) + out_2 = keras.layers.Dense(1, activation='sigmoid', name='output_2')(x) + model = keras.models.Model(inp, [out_1, out_2]) + with self.assertRaisesRegex( + ValueError, 'When passing a list of lists as `metrics`, ' + 'it should have one entry per model output. ' + 'The model has 2 outputs, but you passed metrics='): + model.compile('rmsprop', loss='mse', metrics=[['mse']]) + @keras_parameterized.run_all_keras_modes def test_metrics_masking(self): if testing_utils.should_run_eagerly(): diff --git a/tensorflow/python/keras/engine/training_utils.py b/tensorflow/python/keras/engine/training_utils.py index 24d2c2528f..16aaa51e55 100644 --- a/tensorflow/python/keras/engine/training_utils.py +++ b/tensorflow/python/keras/engine/training_utils.py @@ -525,7 +525,7 @@ def collect_per_output_metric_info(metrics, """Maps metric names and functions to model outputs. Arguments: - metrics: a list or dict of metric functions. + metrics: a list or a list of lists or a dict of metric functions. output_names: a list of the names (strings) of model outputs. output_shapes: a list of the shapes (strings) of model outputs. loss_fns: a list of the loss functions corresponding to the model outputs. @@ -551,20 +551,30 @@ def collect_per_output_metric_info(metrics, """ if not metrics: return [{} for _ in output_names] + if isinstance(metrics, list): - # we then apply all metrics to all outputs. - if len(output_names) > 1: - nested_metrics = [] - for _ in output_names: - nested_metrics.append([metrics_module.clone_metric(m) for m in metrics]) + any_sub_list = any(isinstance(m, list) for m in metrics) + if any_sub_list: + if len(metrics) != len(output_names): + raise ValueError('When passing a list of lists as `metrics`, ' + 'it should have one entry per model output. ' + 'The model has ' + str(len(output_names)) + + ' outputs, but you passed metrics=' + str(metrics)) + # User has provided a list of len = len(outputs). + nested_metrics = [generic_utils.to_list(m) for m in metrics] else: - nested_metrics = [metrics] + # If it is a single list we then apply all metrics to all outputs. + if len(output_names) > 1: + nested_metrics = [] + for _ in output_names: + nested_metrics.append( + [metrics_module.clone_metric(m) for m in metrics]) + else: + nested_metrics = [metrics] elif isinstance(metrics, dict): nested_metrics = [] for name in output_names: - output_metrics = metrics.get(name, []) - if not isinstance(output_metrics, list): - output_metrics = [output_metrics] + output_metrics = generic_utils.to_list(metrics.get(name, [])) nested_metrics.append(output_metrics) else: raise TypeError('Type of `metrics` argument not understood. ' -- GitLab From 43f47645a8dcae81f5fa626848c61b4464765531 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Thu, 14 Feb 2019 19:32:14 -0800 Subject: [PATCH 178/351] [Grappler] Add initial support for DT_QINT32, DT_QINT16, DT_QUINT16, DT_QINT8, and DT_QUINT8 to ConstantFolding. PiperOrigin-RevId: 234072895 --- tensorflow/core/grappler/BUILD | 5 +- .../core/grappler/costs/graph_properties.cc | 1 + tensorflow/core/grappler/op_types.cc | 8 +- tensorflow/core/grappler/op_types.h | 1 + tensorflow/core/grappler/optimizers/BUILD | 20 +- .../optimizers/arithmetic_optimizer_test.cc | 208 +------------- .../arithmetic_optimizer_test_utils.h | 236 ++++++++++++++++ .../grappler/optimizers/constant_folding.cc | 117 ++++++++ .../grappler/optimizers/constant_folding.h | 3 + tensorflow/core/grappler/utils.cc | 68 +++-- tensorflow/core/grappler/utils_test.cc | 267 ++++++++++++------ 11 files changed, 613 insertions(+), 321 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h diff --git a/tensorflow/core/grappler/BUILD b/tensorflow/core/grappler/BUILD index 9fe699360f..77307708fa 100644 --- a/tensorflow/core/grappler/BUILD +++ b/tensorflow/core/grappler/BUILD @@ -1,7 +1,6 @@ licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "tf_cc_test") -load("//tensorflow:tensorflow.bzl", "tf_cuda_library") +load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_cuda_library") cc_library( name = "op_types", @@ -45,6 +44,7 @@ tf_cc_test( "//tensorflow/core:tensor_testutil", "//tensorflow/core:test", "//tensorflow/core:test_main", + "@com_google_absl//absl/strings", ], ) @@ -71,7 +71,6 @@ cc_library( deps = [ ":graph_view", "//tensorflow/core:graph", - "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 8ec558be7d..6907988d08 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -481,6 +481,7 @@ bool IsNumericType(const DataType dtype) { DT_QINT8, DT_QUINT8, DT_QINT16, + DT_QUINT16, DT_QINT32, // Bool. DT_BOOL, diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 5d2fa4a45b..59400dc479 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -279,8 +279,8 @@ bool IsLogicalOr(const NodeDef& node) { return node.op() == "LogicalOr"; } bool IsMatMul(const NodeDef& node) { const auto& op = node.op(); - return op == "MatMul" || op == "BatchMatMul" || op == "QuantizedMatMul" || - op == "SparseMatMul"; + return op == "MatMul" || op == "BatchMatMul" || op == "SparseMatMul" || + IsQuantizedMatMul(node); } bool IsMax(const NodeDef& node) { return node.op() == "Max"; } @@ -350,6 +350,10 @@ bool IsPrint(const NodeDef& node) { bool IsProd(const NodeDef& node) { return node.op() == "Prod"; } +bool IsQuantizedMatMul(const NodeDef& node) { + return node.op() == "QuantizedMatMul" || node.op() == "QuantizedMatMulV2"; +} + bool IsQueue(const NodeDef& node) { return str_util::EndsWith(node.op(), "QueueV2"); } diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index bc1d8c15ac..bc1bb33772 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -106,6 +106,7 @@ bool IsPack(const NodeDef& node); bool IsPad(const NodeDef& node); bool IsPack(const NodeDef& node); bool IsPartitionedCall(const NodeDef& node); +bool IsQuantizedMatMul(const NodeDef& node); bool IsNeg(const NodeDef& node); bool IsNoOp(const NodeDef& node); bool IsNotEqual(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 9bb63a5f4e..af6fb13761 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -3,7 +3,6 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") # Platform specific build config load( @@ -274,13 +273,29 @@ cc_library( ], ) +cc_library( + name = "arithmetic_optimizer_test_utils", + testonly = 1, + hdrs = [ + "arithmetic_optimizer_test_utils.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":arithmetic_optimizer", + ":constant_folding", + ":model_pruner", + "//tensorflow/core:test", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) + tf_cuda_cc_test( name = "arithmetic_optimizer_test", size = "small", srcs = ["arithmetic_optimizer_test.cc"], deps = [ ":arithmetic_optimizer", - ":constant_folding", + ":arithmetic_optimizer_test_utils", ":model_pruner", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", @@ -295,7 +310,6 @@ tf_cuda_cc_test( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", - "//tensorflow/core/grappler/utils:grappler_test", ], ) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 1220aefecf..2778334622 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -20,10 +20,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" -#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils.h" -#include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" @@ -92,211 +91,6 @@ void VerifyGraphsMatch(const GraphDef& original_graph, } } // namespace -class ArithmeticOptimizerTest : public GrapplerTest { - protected: - // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no - // longer have any output consumers. - void OptimizeAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item, - GraphDef* output) { - TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); - item->graph.Swap(output); - output->Clear(); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); - } - - // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent. - void OptimizeTwice(ArithmeticOptimizer* optimizer, GrapplerItem* item, - GraphDef* output) { - TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); - item->graph.Swap(output); - output->Clear(); - TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); - } - - // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent. - // Optionally run a constant folding pass before pruning. - void OptimizeTwiceAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item, - GraphDef* output, bool const_folding = false) { - TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); - - item->graph.Swap(output); - output->Clear(); - TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); - - if (const_folding) { - item->graph.Swap(output); - output->Clear(); - TF_EXPECT_OK(ConstantFolding(/*cpu_device=*/nullptr) - .Optimize(nullptr, *item, output)); - } - - item->graph.Swap(output); - output->Clear(); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); - } - - // TODO(ezhulenev): Make private. After migration to stages each test - // should explicitly enable required optimization for tests isolation - void DisableAllStages(ArithmeticOptimizer* optimizer) { - ArithmeticOptimizer::ArithmeticOptimizerOptions options; - options.dedup_computations = false; - options.combine_add_to_addn = false; - options.convert_sqrt_div_to_rsqrt_mul = false; - options.convert_pow = false; - options.convert_log1p = false; - options.optimize_max_or_min_of_monotonic = false; - options.fold_conjugate_into_transpose = false; - options.fold_multiply_into_conv = false; - options.fold_transpose_into_matmul = false; - options.hoist_common_factor_out_of_aggregation = false; - options.hoist_cwise_unary_chains = false; - options.minimize_broadcasts = false; - options.remove_identity_transpose = false; - options.remove_involution = false; - options.remove_idempotent = false; - options.remove_redundant_bitcast = false; - options.remove_redundant_cast = false; - options.remove_redundant_reshape = false; - options.remove_negation = false; - options.remove_logical_not = false; - options.reorder_cast_like_and_value_preserving = false; - options.replace_mul_with_square = false; - options.simplify_aggregation = false; - options.unary_ops_composition = false; - optimizer->options_ = options; - } - - void DisableAddToAddNCombining(ArithmeticOptimizer* optimizer) { - optimizer->options_.combine_add_to_addn = false; - } - - void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.combine_add_to_addn = true; - } - - void EnableOnlyFoldConjugateIntoTranspose(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.fold_conjugate_into_transpose = true; - } - - void EnableOnlyFoldMultipleIntoConv(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.fold_multiply_into_conv = true; - } - - void EnableOnlyFoldTransposeIntoMatMul(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.fold_transpose_into_matmul = true; - } - - void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.hoist_common_factor_out_of_aggregation = true; - } - - void EnableOnlyMinimizeBroadcasts(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.minimize_broadcasts = true; - } - - void EnableOnlyRemoveIdentityTranspose(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_identity_transpose = true; - } - - void EnableOnlyRemoveInvolution(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_involution = true; - } - - void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_redundant_bitcast = true; - } - - void EnableOnlyRemoveRedundantCast(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_redundant_cast = true; - } - - void EnableOnlyRemoveRedundantReshape(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_redundant_reshape = true; - } - - void EnableOnlyRemoveNegation(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_negation = true; - } - - void EnableOnlyReorderCastAndTranspose(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.reorder_cast_like_and_value_preserving = true; - } - - void EnableOnlyReplaceMulWithSquare(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.replace_mul_with_square = true; - } - - void EnableOnlyHoistCWiseUnaryChains(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.hoist_cwise_unary_chains = true; - } - - void EnableOnlySqrtDivToRsqrtMul(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.convert_sqrt_div_to_rsqrt_mul = true; - } - - void EnableOnlyConvertPow(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.convert_pow = true; - } - - void EnableOnlyRemoveIdempotent(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_idempotent = true; - } - - void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_logical_not = true; - } - - void EnableOnlySimplifyAggregation(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.simplify_aggregation = true; - } - - void EnableOnlyLog1p(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.convert_log1p = true; - } - - void EnableOnlyOptimizeMaxOrMinOfMonotonic(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.optimize_max_or_min_of_monotonic = true; - } - - void EnableOnlyExpm1(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.convert_expm1 = true; - } - - void EnableOnlyUnaryOpsComposition(ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.unary_ops_composition = true; - } - - void EnableOnlyRemoveStackStridedSliceSameAxis( - ArithmeticOptimizer* optimizer) { - DisableAllStages(optimizer); - optimizer->options_.remove_stack_strided_slice_same_axis = true; - } -}; - TEST_F(ArithmeticOptimizerTest, NoOp) { // This trivial graph is so basic there's nothing to optimize. TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h new file mode 100644 index 0000000000..94d0adc609 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test_utils.h @@ -0,0 +1,236 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_ + +#include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/model_pruner.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace grappler { + +class ArithmeticOptimizerTest : public GrapplerTest { + protected: + // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no + // longer have any output consumers. + void OptimizeAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item, + GraphDef* output) { + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + item->graph.Swap(output); + output->Clear(); + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); + } + + // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent. + void OptimizeTwice(ArithmeticOptimizer* optimizer, GrapplerItem* item, + GraphDef* output) { + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + item->graph.Swap(output); + output->Clear(); + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + } + + // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent. + // Optionally run a constant folding pass before pruning. + void OptimizeTwiceAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item, + GraphDef* output, bool const_folding = false) { + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + + item->graph.Swap(output); + output->Clear(); + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + + if (const_folding) { + item->graph.Swap(output); + output->Clear(); + TF_EXPECT_OK(ConstantFolding(/*cpu_device=*/nullptr) + .Optimize(nullptr, *item, output)); + } + + item->graph.Swap(output); + output->Clear(); + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); + } + + // TODO(ezhulenev): Make private. After migration to stages each test + // should explicitly enable required optimization for tests isolation + void DisableAllStages(ArithmeticOptimizer* optimizer) { + ArithmeticOptimizer::ArithmeticOptimizerOptions options; + options.dedup_computations = false; + options.combine_add_to_addn = false; + options.convert_sqrt_div_to_rsqrt_mul = false; + options.convert_pow = false; + options.convert_log1p = false; + options.optimize_max_or_min_of_monotonic = false; + options.fold_conjugate_into_transpose = false; + options.fold_multiply_into_conv = false; + options.fold_transpose_into_matmul = false; + options.hoist_common_factor_out_of_aggregation = false; + options.hoist_cwise_unary_chains = false; + options.minimize_broadcasts = false; + options.remove_identity_transpose = false; + options.remove_involution = false; + options.remove_idempotent = false; + options.remove_redundant_bitcast = false; + options.remove_redundant_cast = false; + options.remove_redundant_reshape = false; + options.remove_negation = false; + options.remove_logical_not = false; + options.reorder_cast_like_and_value_preserving = false; + options.replace_mul_with_square = false; + options.simplify_aggregation = false; + options.unary_ops_composition = false; + optimizer->options_ = options; + } + + void DisableAddToAddNCombining(ArithmeticOptimizer* optimizer) { + optimizer->options_.combine_add_to_addn = false; + } + + void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.combine_add_to_addn = true; + } + + void EnableOnlyFoldConjugateIntoTranspose(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.fold_conjugate_into_transpose = true; + } + + void EnableOnlyFoldMultipleIntoConv(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.fold_multiply_into_conv = true; + } + + void EnableOnlyFoldTransposeIntoMatMul(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.fold_transpose_into_matmul = true; + } + + void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.hoist_common_factor_out_of_aggregation = true; + } + + void EnableOnlyMinimizeBroadcasts(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.minimize_broadcasts = true; + } + + void EnableOnlyRemoveIdentityTranspose(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_identity_transpose = true; + } + + void EnableOnlyRemoveInvolution(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_involution = true; + } + + void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_redundant_bitcast = true; + } + + void EnableOnlyRemoveRedundantCast(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_redundant_cast = true; + } + + void EnableOnlyRemoveRedundantReshape(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_redundant_reshape = true; + } + + void EnableOnlyRemoveNegation(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_negation = true; + } + + void EnableOnlyReorderCastAndTranspose(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.reorder_cast_like_and_value_preserving = true; + } + + void EnableOnlyReplaceMulWithSquare(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.replace_mul_with_square = true; + } + + void EnableOnlyHoistCWiseUnaryChains(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.hoist_cwise_unary_chains = true; + } + + void EnableOnlySqrtDivToRsqrtMul(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.convert_sqrt_div_to_rsqrt_mul = true; + } + + void EnableOnlyConvertPow(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.convert_pow = true; + } + + void EnableOnlyRemoveIdempotent(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_idempotent = true; + } + + void EnableOnlyRemoveLogicalNot(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_logical_not = true; + } + + void EnableOnlySimplifyAggregation(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.simplify_aggregation = true; + } + + void EnableOnlyLog1p(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.convert_log1p = true; + } + + void EnableOnlyOptimizeMaxOrMinOfMonotonic(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.optimize_max_or_min_of_monotonic = true; + } + + void EnableOnlyExpm1(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.convert_expm1 = true; + } + + void EnableOnlyUnaryOpsComposition(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.unary_ops_composition = true; + } + + void EnableOnlyRemoveStackStridedSliceSameAxis( + ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_stack_strided_slice_same_axis = true; + } +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_ARITHMETIC_OPTIMIZER_TEST_UTILS_H_ diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index e626943ee6..cf495eecf5 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -20,6 +20,7 @@ limitations under the License. #include #include "absl/strings/string_view.h" +#include "absl/strings/substitute.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/function.pb.h" @@ -37,6 +38,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/evaluation_utils.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/symbolic_shapes.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" @@ -185,6 +187,40 @@ bool IsDenormal(double x) { return !std::isnormal(x); } +float QuantizedTypeMinAsFloat(DataType data_type) { + switch (data_type) { + case DT_QINT8: + return Eigen::NumTraits::lowest(); + case DT_QUINT8: + return Eigen::NumTraits::lowest(); + case DT_QINT16: + return Eigen::NumTraits::lowest(); + case DT_QUINT16: + return Eigen::NumTraits::lowest(); + case DT_QINT32: + return Eigen::NumTraits::lowest(); + default: + return 0.0f; + } +} + +float QuantizedTypeMaxAsFloat(DataType data_type) { + switch (data_type) { + case DT_QINT8: + return Eigen::NumTraits::highest(); + case DT_QUINT8: + return Eigen::NumTraits::highest(); + case DT_QINT16: + return Eigen::NumTraits::highest(); + case DT_QUINT16: + return Eigen::NumTraits::highest(); + case DT_QINT32: + return Eigen::NumTraits::highest(); + default: + return 0.0f; + } +} + } // namespace ConstantFolding::ConstantFolding(RewriterConfig::Toggle opt_level, @@ -945,6 +981,11 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); SET_TENSOR_VAL_CASE(DT_INT8, int32, int); SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); + SET_TENSOR_VAL_CASE(DT_QINT32, int32, int); + SET_TENSOR_VAL_CASE(DT_QINT16, int32, int); + SET_TENSOR_VAL_CASE(DT_QUINT16, int32, int); + SET_TENSOR_VAL_CASE(DT_QINT8, int32, int); + SET_TENSOR_VAL_CASE(DT_QUINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); default: return errors::InvalidArgument("Unsupported type: ", type); @@ -1085,6 +1126,8 @@ Status ConstantFolding::CreateNodeDef(const string& name, t->set_dtype(tensor->dtype()); tensor->shape().AsProto(t->mutable_tensor_shape()); } else { + // DT_HALF, DT_BFLOAT16, DT_QINT32, DT_QINT16, DT_QUINT16, DT_QINT8, + // DT_QUINT8 tensor->AsProtoTensorContent(t); encoded_size = t->tensor_content().size(); } @@ -1533,6 +1576,11 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { IS_ONES_CASE(DT_INT16); IS_ONES_CASE(DT_INT32); IS_ONES_CASE(DT_INT64); + IS_ONES_CASE(DT_QINT32); + IS_ONES_CASE(DT_QINT16); + IS_ONES_CASE(DT_QUINT16); + IS_ONES_CASE(DT_QINT8); + IS_ONES_CASE(DT_QUINT8); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1567,6 +1615,11 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { IS_ZEROS_CASE(DT_INT16); IS_ZEROS_CASE(DT_INT32); IS_ZEROS_CASE(DT_INT64); + IS_ZEROS_CASE(DT_QINT32); + IS_ZEROS_CASE(DT_QINT16); + IS_ZEROS_CASE(DT_QUINT16); + IS_ZEROS_CASE(DT_QINT8); + IS_ZEROS_CASE(DT_QUINT8); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -2576,6 +2629,7 @@ Status ConstantFolding::SimplifyArithmeticOperations( *success = false; const bool is_mul = IsMul(*node) || IsLogicalAnd(*node); const bool is_matmul = IsMatMul(*node); + const bool is_quantized_matmul = IsQuantizedMatMul(*node); const bool is_add = IsAdd(*node) || IsBiasAdd(*node) || IsLogicalOr(*node); const bool is_sub = IsSub(*node); const bool is_any_div = IsAnyDiv(*node); @@ -2670,6 +2724,10 @@ Status ConstantFolding::SimplifyArithmeticOperations( if (!replace_op_status.ok()) { return replace_op_status; } else if (replace_succeed) { + if (is_quantized_matmul) { + TF_RETURN_IF_ERROR( + AddQuantizedMatMulMinMaxOutConstNodes(node, optimized_graph)); + } *success = true; return Status::OK(); } @@ -3237,6 +3295,65 @@ bool ConstantFolding::MergeConcat(const GraphProperties& properties, return true; } +Status ConstantFolding::AddQuantizedMatMulMinMaxOutConstNodes( + NodeDef* node, GraphDef* optimized_graph) { + auto add_quantized_out = [this, node, optimized_graph]( + const string& out_const_name, int index) { + NodeDef* out_node = optimized_graph->add_node(); + Tensor value(DT_FLOAT, TensorShape({})); + const bool is_min = index == 1; + const DataType type_attr = node->attr().at("dtype").type(); + + value.flat()(0) = is_min ? QuantizedTypeMinAsFloat(type_attr) + : QuantizedTypeMaxAsFloat(type_attr); + TF_RETURN_IF_ERROR( + CreateNodeDef(out_const_name, TensorValue(&value), out_node)); + node_map_->AddNode(out_const_name, out_node); + out_node->set_device(node->device()); + + // Copy all inputs from node. + out_node->mutable_input()->CopyFrom(node->input()); + for (const string& input : out_node->input()) { + node_map_->AddOutput(NodeName(input), out_const_name); + } + + // Update output nodes consuming node:index to new const node. + string old_input = absl::StrCat(node->name(), ":", index); + int old_node_count = 0; + auto outputs = node_map_->GetOutputs(node->name()); + for (const auto& output : outputs) { + for (int i = 0; i < output->input_size(); ++i) { + if (output->input(i) == old_input) { + output->set_input(i, out_const_name); + node_map_->AddOutput(out_const_name, output->name()); + } else if (NodeName(output->input(i)) == node->name()) { + ++old_node_count; + } + } + if (old_node_count == 0) { + node_map_->RemoveOutput(node->name(), output->name()); + } + } + + return Status::OK(); + }; + const string min_out_const_name = + OptimizedNodeName(*node, "-quantized_matmul_min_out"); + const string max_out_const_name = + OptimizedNodeName(*node, "-quantized_matmul_max_out"); + if (node_map_->GetNode(min_out_const_name) == nullptr && + node_map_->GetNode(max_out_const_name) == nullptr) { + TF_RETURN_IF_ERROR(add_quantized_out(min_out_const_name, 1)); + TF_RETURN_IF_ERROR(add_quantized_out(max_out_const_name, 2)); + } else { + return errors::Internal(absl::Substitute( + "Can't create Const for QuantizedMatMul min_out/max_out of " + "node '$0' because of node name conflict", + node->name())); + } + return Status::OK(); +} + Status ConstantFolding::RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 7cf01b4b62..418176c893 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -236,6 +236,9 @@ class ConstantFolding : public GraphOptimizer { bool MergeConcat(const GraphProperties& properties, bool use_shape_info, GraphDef* optimized_graph, NodeDef* node); + Status AddQuantizedMatMulMinMaxOutConstNodes(NodeDef* node, + GraphDef* optimized_graph); + // Points to an externally provided device or to owned_device_; RewriterConfig::Toggle opt_level_; DeviceBase* cpu_device_; diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 375c3e56c8..7d4dfb0520 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -40,7 +40,7 @@ namespace tensorflow { namespace grappler { namespace { template -bool SafeSetScalarTensorValue(double value, Tensor* tensor) { +bool SafeSetDoubleScalarTensorValue(double value, Tensor* tensor) { using RealType = typename Eigen::NumTraits::Real; if (value > static_cast(Eigen::NumTraits::highest()) || value < static_cast(Eigen::NumTraits::lowest())) { @@ -50,6 +50,17 @@ bool SafeSetScalarTensorValue(double value, Tensor* tensor) { return true; } +template +bool SafeSetIntScalarTensorValue(int value, Tensor* tensor) { + using RealType = typename Eigen::NumTraits::Real; + if (value > static_cast(Eigen::NumTraits::highest()) || + value < static_cast(Eigen::NumTraits::lowest())) { + return false; + } + tensor->flat()(0) = static_cast(value); + return true; +} + // Is 'node' an operator that consumes only the shape of its input, not the // data itself? // TODO(ezhulenev): move to op_types.h. Requires to break circular dependency. @@ -410,35 +421,50 @@ void EraseNodesFromGraph(const std::set& nodes_to_delete, EraseNodesFromGraphImpl(nodes_idx_to_delete, graph); } -#define HANDLE_CASE(DTYPE) \ - case DTYPE: \ - if (!SafeSetScalarTensorValue::Type>( \ - static_cast(value), tensor)) { \ - return errors::InvalidArgument("Cannot store value ", value, \ - " in tensor of type " #DTYPE); \ - } \ +#define HANDLE_DOUBLE_CASE(DTYPE) \ + case DTYPE: \ + if (!SafeSetDoubleScalarTensorValue::Type>( \ + static_cast(value), tensor)) { \ + return errors::InvalidArgument("Cannot store value ", value, \ + " in tensor of type " #DTYPE); \ + } \ + break + +#define HANDLE_INT_CASE(DTYPE) \ + case DTYPE: \ + if (!SafeSetIntScalarTensorValue::Type>(value, \ + tensor)) { \ + return errors::InvalidArgument("Cannot store value ", value, \ + " in tensor of type " #DTYPE); \ + } \ break Status SetTensorValue(DataType dtype, int value, Tensor* tensor) { // TODO(rmlarsen): Support more general shapes. + // TODO(lyandy): Change `value` to be int64 once int64 -> qint32 is supported. if (tensor->NumElements() != 1) { return errors::InvalidArgument( "Expected scalar tensor, got num_elements = ", tensor->NumElements()); } switch (dtype) { - HANDLE_CASE(DT_HALF); - HANDLE_CASE(DT_BFLOAT16); - HANDLE_CASE(DT_BOOL); - HANDLE_CASE(DT_FLOAT); - HANDLE_CASE(DT_DOUBLE); - HANDLE_CASE(DT_UINT8); - HANDLE_CASE(DT_INT8); - HANDLE_CASE(DT_UINT16); - HANDLE_CASE(DT_INT16); - HANDLE_CASE(DT_INT32); - HANDLE_CASE(DT_INT64); - HANDLE_CASE(DT_COMPLEX64); - HANDLE_CASE(DT_COMPLEX128); + HANDLE_DOUBLE_CASE(DT_HALF); + HANDLE_DOUBLE_CASE(DT_BFLOAT16); + HANDLE_DOUBLE_CASE(DT_BOOL); + HANDLE_DOUBLE_CASE(DT_FLOAT); + HANDLE_DOUBLE_CASE(DT_DOUBLE); + HANDLE_DOUBLE_CASE(DT_UINT8); + HANDLE_DOUBLE_CASE(DT_INT8); + HANDLE_DOUBLE_CASE(DT_UINT16); + HANDLE_DOUBLE_CASE(DT_INT16); + HANDLE_DOUBLE_CASE(DT_INT32); + HANDLE_DOUBLE_CASE(DT_INT64); + HANDLE_DOUBLE_CASE(DT_COMPLEX64); + HANDLE_DOUBLE_CASE(DT_COMPLEX128); + HANDLE_INT_CASE(DT_QINT8); + HANDLE_INT_CASE(DT_QUINT8); + HANDLE_INT_CASE(DT_QINT16); + HANDLE_INT_CASE(DT_QUINT16); + HANDLE_INT_CASE(DT_QINT32); default: return errors::InvalidArgument("Unsupported type ", DataTypeString(dtype)); diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index f5ae39867a..e30b1c5b73 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -18,6 +18,8 @@ limitations under the License. #include #include #include + +#include "absl/strings/substitute.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -124,56 +126,56 @@ class UtilsTest : public ::testing::Test { }; TEST_F(UtilsTest, NodeName) { - EXPECT_EQ("abc", NodeName("abc")); - EXPECT_EQ("abc", NodeName("^abc")); - EXPECT_EQ("abc", NodeName("abc:0")); - EXPECT_EQ("abc", NodeName("^abc:0")); - - EXPECT_EQ("abc/def", NodeName("abc/def")); - EXPECT_EQ("abc/def", NodeName("^abc/def")); - EXPECT_EQ("abc/def", NodeName("abc/def:1")); - EXPECT_EQ("abc/def", NodeName("^abc/def:1")); - - EXPECT_EQ("abc/def0", NodeName("abc/def0")); - EXPECT_EQ("abc/def0", NodeName("^abc/def0")); - EXPECT_EQ("abc/def0", NodeName("abc/def0:0")); - EXPECT_EQ("abc/def0", NodeName("^abc/def0:0")); - - EXPECT_EQ("abc/def_0", NodeName("abc/def_0")); - EXPECT_EQ("abc/def_0", NodeName("^abc/def_0")); - EXPECT_EQ("abc/def_0", NodeName("abc/def_0:3")); - EXPECT_EQ("abc/def_0", NodeName("^abc/def_0:3")); - - EXPECT_EQ("abc/def_0", NodeName("^abc/def_0:3214")); + EXPECT_EQ(NodeName("abc"), "abc"); + EXPECT_EQ(NodeName("^abc"), "abc"); + EXPECT_EQ(NodeName("abc:0"), "abc"); + EXPECT_EQ(NodeName("^abc:0"), "abc"); + + EXPECT_EQ(NodeName("abc/def"), "abc/def"); + EXPECT_EQ(NodeName("^abc/def"), "abc/def"); + EXPECT_EQ(NodeName("abc/def:1"), "abc/def"); + EXPECT_EQ(NodeName("^abc/def:1"), "abc/def"); + + EXPECT_EQ(NodeName("abc/def0"), "abc/def0"); + EXPECT_EQ(NodeName("^abc/def0"), "abc/def0"); + EXPECT_EQ(NodeName("abc/def0:0"), "abc/def0"); + EXPECT_EQ(NodeName("^abc/def0:0"), "abc/def0"); + + EXPECT_EQ(NodeName("abc/def_0"), "abc/def_0"); + EXPECT_EQ(NodeName("^abc/def_0"), "abc/def_0"); + EXPECT_EQ(NodeName("abc/def_0:3"), "abc/def_0"); + EXPECT_EQ(NodeName("^abc/def_0:3"), "abc/def_0"); + + EXPECT_EQ(NodeName("^abc/def_0:3214"), "abc/def_0"); } TEST_F(UtilsTest, NodePosition) { - EXPECT_EQ(2, NodePosition("abc:2")); - EXPECT_EQ(123, NodePosition("abc:123")); - EXPECT_EQ(-1, NodePosition("^abc:123")); - EXPECT_EQ(-1, NodePosition("^abc")); - EXPECT_EQ(0, NodePosition("")); + EXPECT_EQ(NodePosition("abc:2"), 2); + EXPECT_EQ(NodePosition("abc:123"), 123); + EXPECT_EQ(NodePosition("^abc:123"), -1); + EXPECT_EQ(NodePosition("^abc"), -1); + EXPECT_EQ(NodePosition(""), 0); } TEST_F(UtilsTest, NodePositionIfSameNode) { - EXPECT_EQ(-2, NodePositionIfSameNode(":123", "")); - EXPECT_EQ(-2, NodePositionIfSameNode(":", "")); - EXPECT_EQ(-2, NodePositionIfSameNode("", "")); - EXPECT_EQ(123, NodePositionIfSameNode("abc:123", "abc")); - EXPECT_EQ(-1, NodePositionIfSameNode("^abc", "abc")); - EXPECT_EQ(-1, NodePositionIfSameNode("^abc:123", "abc")); - EXPECT_EQ(-2, NodePositionIfSameNode("abc", "xyz")); - EXPECT_EQ(-2, NodePositionIfSameNode("abc", "abc/xyz")); - EXPECT_EQ(-2, NodePositionIfSameNode("abc/xyz", "abc")); - EXPECT_EQ(-2, NodePositionIfSameNode("abc:123", "xyz")); - EXPECT_EQ(-2, NodePositionIfSameNode("^abc", "xyz")); - EXPECT_EQ(-2, NodePositionIfSameNode("^abc:123", "xyz")); + EXPECT_EQ(NodePositionIfSameNode(":123", ""), -2); + EXPECT_EQ(NodePositionIfSameNode(":", ""), -2); + EXPECT_EQ(NodePositionIfSameNode("", ""), -2); + EXPECT_EQ(NodePositionIfSameNode("abc:123", "abc"), 123); + EXPECT_EQ(NodePositionIfSameNode("^abc", "abc"), -1); + EXPECT_EQ(NodePositionIfSameNode("^abc:123", "abc"), -1); + EXPECT_EQ(NodePositionIfSameNode("abc", "xyz"), -2); + EXPECT_EQ(NodePositionIfSameNode("abc", "abc/xyz"), -2); + EXPECT_EQ(NodePositionIfSameNode("abc/xyz", "abc"), -2); + EXPECT_EQ(NodePositionIfSameNode("abc:123", "xyz"), -2); + EXPECT_EQ(NodePositionIfSameNode("^abc", "xyz"), -2); + EXPECT_EQ(NodePositionIfSameNode("^abc:123", "xyz"), -2); } TEST_F(UtilsTest, AddNodeNamePrefix) { - EXPECT_EQ("OPTIMIZED/abc", AddPrefixToNodeName("abc", "OPTIMIZED")); - EXPECT_EQ("^OPTIMIZED/abc", AddPrefixToNodeName("^abc", "OPTIMIZED")); - EXPECT_EQ("OPTIMIZED/", AddPrefixToNodeName("", "OPTIMIZED")); + EXPECT_EQ(AddPrefixToNodeName("abc", "OPTIMIZED"), "OPTIMIZED/abc"); + EXPECT_EQ(AddPrefixToNodeName("^abc", "OPTIMIZED"), "^OPTIMIZED/abc"); + EXPECT_EQ(AddPrefixToNodeName("", "OPTIMIZED"), "OPTIMIZED/"); } TEST_F(UtilsTest, ExecuteWithTimeout) { @@ -204,17 +206,17 @@ TEST_F(UtilsTest, ExecuteWithTimeout) { TEST_F(UtilsTest, NumOutputs) { GraphDef graph; - EXPECT_EQ(2, NumOutputs(CreateConcatOffsetNode(), &graph)); - EXPECT_EQ(5, NumOutputs(CreateFusedBatchNormNode(), &graph)); - EXPECT_EQ(1, NumOutputs(CreateDequeueNode(), &graph)); + EXPECT_EQ(NumOutputs(CreateConcatOffsetNode(), &graph), 2); + EXPECT_EQ(NumOutputs(CreateFusedBatchNormNode(), &graph), 5); + EXPECT_EQ(NumOutputs(CreateDequeueNode(), &graph), 1); } TEST_F(UtilsTest, AsControlDependency) { NodeDef node; node.set_name("foo"); - EXPECT_EQ("^foo", AsControlDependency(node)); - EXPECT_EQ("^foo", AsControlDependency(node.name())); - EXPECT_EQ("^foo", AsControlDependency("^foo")); + EXPECT_EQ(AsControlDependency(node), "^foo"); + EXPECT_EQ(AsControlDependency(node.name()), "^foo"); + EXPECT_EQ(AsControlDependency("^foo"), "^foo"); } TEST_F(UtilsTest, GetTailOfChain) { @@ -233,22 +235,23 @@ TEST_F(UtilsTest, GetTailOfChain) { GraphDef graph; TF_CHECK_OK(s.ToGraphDef(&graph)); - ASSERT_EQ("c0", graph.node(0).name()); - ASSERT_EQ("c1", graph.node(1).name()); - ASSERT_EQ("neg0", graph.node(2).name()); - ASSERT_EQ("neg1", graph.node(3).name()); - ASSERT_EQ("neg2", graph.node(4).name()); - ASSERT_EQ("id1", graph.node(5).name()); - ASSERT_EQ("id2", graph.node(6).name()); - ASSERT_EQ("noop", graph.node(7).name()); + ASSERT_EQ(graph.node_size(), 8); + ASSERT_EQ(graph.node(0).name(), "c0"); + ASSERT_EQ(graph.node(1).name(), "c1"); + ASSERT_EQ(graph.node(2).name(), "neg0"); + ASSERT_EQ(graph.node(3).name(), "neg1"); + ASSERT_EQ(graph.node(4).name(), "neg2"); + ASSERT_EQ(graph.node(5).name(), "id1"); + ASSERT_EQ(graph.node(6).name(), "id2"); + ASSERT_EQ(graph.node(7).name(), "noop"); NodeMap node_map(&graph); auto is_neg = [&](const NodeDef& node) { return node.op() == "Neg"; }; // We walk backwards, starting as "id1", so tail should be "neg1". NodeDef* tail = GetTailOfChain(graph.node(5), node_map, /*follow_control_input=*/false, is_neg); - EXPECT_NE(tail, nullptr); - EXPECT_EQ("neg1", tail->name()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->name(), "neg1"); // We stop at branching nodes, so tail should be "neg2". auto is_neg_and_non_branching = [&](const NodeDef& node) { @@ -257,22 +260,22 @@ TEST_F(UtilsTest, GetTailOfChain) { tail = GetTailOfChain(graph.node(5), node_map, /*follow_control_input=*/false, is_neg_and_non_branching); - EXPECT_NE(tail, nullptr); - EXPECT_EQ("neg2", tail->name()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->name(), "neg2"); // We walk backwards, starting from "noop", also following control inputs, // so tail should be "neg0". tail = GetTailOfChain(graph.node(7), node_map, /*follow_control_input=*/true, is_neg); - EXPECT_NE(tail, nullptr); - EXPECT_EQ("neg0", tail->name()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->name(), "neg0"); // We walk backwards, starting from "noop", not following control inputs, // so tail should be "noop" itself. tail = GetTailOfChain(graph.node(7), node_map, /*follow_control_input=*/false, is_neg); - EXPECT_NE(tail, nullptr); - EXPECT_EQ("noop", tail->name()); + ASSERT_NE(tail, nullptr); + EXPECT_EQ(tail->name(), "noop"); } TEST_F(UtilsTest, DedupControlInputs) { @@ -280,40 +283,40 @@ TEST_F(UtilsTest, DedupControlInputs) { foo.set_name("foo"); foo.add_input("bar"); DedupControlInputs(&foo); - EXPECT_EQ(1, foo.input_size()); - EXPECT_EQ("bar", foo.input(0)); + ASSERT_EQ(foo.input_size(), 1); + EXPECT_EQ(foo.input(0), "bar"); foo.set_input(0, "^bar"); DedupControlInputs(&foo); - EXPECT_EQ(1, foo.input_size()); - EXPECT_EQ("^bar", foo.input(0)); + ASSERT_EQ(foo.input_size(), 1); + EXPECT_EQ(foo.input(0), "^bar"); foo.set_input(0, "bar"); foo.add_input("bar"); DedupControlInputs(&foo); - EXPECT_EQ(2, foo.input_size()); - EXPECT_EQ("bar", foo.input(0)); - EXPECT_EQ("bar", foo.input(1)); + ASSERT_EQ(foo.input_size(), 2); + EXPECT_EQ(foo.input(0), "bar"); + EXPECT_EQ(foo.input(1), "bar"); foo.set_input(1, "^bar"); DedupControlInputs(&foo); - EXPECT_EQ(1, foo.input_size()); - EXPECT_EQ("bar", foo.input(0)); + ASSERT_EQ(foo.input_size(), 1); + EXPECT_EQ(foo.input(0), "bar"); foo.set_input(0, "^bar"); foo.add_input("^bar"); DedupControlInputs(&foo); - EXPECT_EQ(1, foo.input_size()); - EXPECT_EQ("^bar", foo.input(0)); + ASSERT_EQ(foo.input_size(), 1); + EXPECT_EQ(foo.input(0), "^bar"); foo.set_input(0, "bar"); foo.add_input("gnu"); foo.add_input("^bar"); foo.add_input("^gnu"); DedupControlInputs(&foo); - EXPECT_EQ(2, foo.input_size()); - EXPECT_EQ("bar", foo.input(0)); - EXPECT_EQ("gnu", foo.input(1)); + ASSERT_EQ(foo.input_size(), 2); + EXPECT_EQ(foo.input(0), "bar"); + EXPECT_EQ(foo.input(1), "gnu"); } TEST_F(UtilsTest, NumNonControlOutputs) { @@ -347,14 +350,14 @@ TEST_F(UtilsTest, NumNonControlOutputs) { NodeMap node_map(&graph); const NodeDef* add_node = node_map.GetNode("add"); - ASSERT_TRUE(add_node != nullptr); + ASSERT_NE(add_node, nullptr); // [a, b] are only non-control inputs - EXPECT_EQ(2, NumNonControlInputs(*add_node)); + EXPECT_EQ(NumNonControlInputs(*add_node), 2); // [sqrt, shape] are non control outputs - EXPECT_EQ(2, NumNonControlOutputs(*add_node, node_map)); + EXPECT_EQ(NumNonControlOutputs(*add_node, node_map), 2); // sqrt is the only data output - EXPECT_EQ(1, NumNonControlDataOutputs(*add_node, node_map)); + EXPECT_EQ(NumNonControlDataOutputs(*add_node, node_map), 1); } TEST(CheckAttrExists, All) { @@ -465,10 +468,104 @@ TEST_F(UtilsTest, SetTensorValueBFloat16IntMin) { } TEST_F(UtilsTest, TensorIdToString) { - EXPECT_EQ("^foo", TensorIdToString({"foo", -1})); - EXPECT_EQ("foo", TensorIdToString({"foo", 0})); - EXPECT_EQ("foo:1", TensorIdToString({"foo", 1})); - EXPECT_EQ("foo:2", TensorIdToString({"foo", 2})); + EXPECT_EQ(TensorIdToString({"foo", -1}), "^foo"); + EXPECT_EQ(TensorIdToString({"foo", 0}), "foo"); + EXPECT_EQ(TensorIdToString({"foo", 1}), "foo:1"); + EXPECT_EQ(TensorIdToString({"foo", 2}), "foo:2"); +} + +template +void TestSetTensorValue(DataType type, int val, bool success, + absl::string_view error_msg) { + Tensor t(type, TensorShape({})); + Status s = SetTensorValue(t.dtype(), val, &t); + EXPECT_EQ(s.ok(), success); + if (s.ok()) { + test::ExpectTensorEqual(Tensor(static_cast(val)), t); + } else { + EXPECT_EQ(s.error_message(), error_msg); + } +} + +TEST(SetTensorValueTest, Quantized) { + auto int_min_error = [](DataType type) { + return absl::Substitute( + "Cannot store value -2147483648 in tensor of type $0", + DataType_Name(type)); + }; + auto int_max_error = [](DataType type) { + return absl::Substitute( + "Cannot store value 2147483647 in tensor of type $0", + DataType_Name(type)); + }; + const int kMinInt = std::numeric_limits::min(); + const int kMaxInt = std::numeric_limits::max(); + + TestSetTensorValue(DT_QINT8, -8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT8, 0, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT8, 8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT8, std::numeric_limits::min(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT8, std::numeric_limits::max(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT8, kMinInt, /*success=*/false, + int_min_error(DT_QINT8)); + TestSetTensorValue(DT_QINT8, kMaxInt, /*success=*/false, + int_max_error(DT_QINT8)); + + TestSetTensorValue( + DT_QUINT8, -8, /*success=*/false, + /*error_msg=*/"Cannot store value -8 in tensor of type DT_QUINT8"); + TestSetTensorValue(DT_QUINT8, 0, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT8, 8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT8, std::numeric_limits::min(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT8, std::numeric_limits::max(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT8, kMinInt, /*success=*/false, + int_min_error(DT_QUINT8)); + TestSetTensorValue(DT_QUINT8, kMaxInt, /*success=*/false, + int_max_error(DT_QUINT8)); + + TestSetTensorValue(DT_QINT16, -8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT16, 0, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT16, 8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT16, std::numeric_limits::min(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT16, std::numeric_limits::max(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT16, kMinInt, /*success=*/false, + int_min_error(DT_QINT16)); + TestSetTensorValue(DT_QINT16, kMaxInt, /*success=*/false, + int_max_error(DT_QINT16)); + + TestSetTensorValue( + DT_QUINT16, -8, /*success=*/false, + /*error_msg=*/"Cannot store value -8 in tensor of type DT_QUINT16"); + TestSetTensorValue(DT_QUINT16, 0, /*success=*/true, + /*error_msg=*/""); + TestSetTensorValue(DT_QUINT16, 8, /*success=*/true, + /*error_msg=*/""); + TestSetTensorValue(DT_QUINT16, std::numeric_limits::min(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT16, std::numeric_limits::max(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QUINT16, kMinInt, /*success=*/false, + int_min_error(DT_QUINT16)); + TestSetTensorValue(DT_QUINT16, kMaxInt, /*success=*/false, + int_max_error(DT_QUINT16)); + + TestSetTensorValue(DT_QINT32, -8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, 0, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, 8, /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, std::numeric_limits::min(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, std::numeric_limits::max(), + /*success=*/true, /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, kMinInt, /*success=*/true, + /*error_msg=*/""); + TestSetTensorValue(DT_QINT32, kMaxInt, /*success=*/true, + /*error_msg=*/""); } } // namespace -- GitLab From dc7c7f7a588697d711b5ae579c355dfb8964692e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 14 Feb 2019 20:00:25 -0800 Subject: [PATCH 179/351] Split py_binary into py_binary and py_library to avoid having py_binary in deps. PiperOrigin-RevId: 234075407 --- tensorflow/lite/schema/BUILD | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/schema/BUILD b/tensorflow/lite/schema/BUILD index ea516764c9..e55419186e 100644 --- a/tensorflow/lite/schema/BUILD +++ b/tensorflow/lite/schema/BUILD @@ -9,6 +9,12 @@ load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") py_binary( name = "upgrade_schema", + srcs = ["upgrade_schema.py"], + deps = [":upgrade_schema_main_lib"], +) + +py_library( + name = "upgrade_schema_main_lib", srcs = [ "upgrade_schema.py", ], @@ -39,7 +45,7 @@ py_test( "notap", ], deps = [ - ":upgrade_schema", + ":upgrade_schema_main_lib", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", ], -- GitLab From 40cd7f905720d52ed4643ae9eaac2ef2375b1408 Mon Sep 17 00:00:00 2001 From: Blake Hechtman Date: Thu, 14 Feb 2019 20:07:18 -0800 Subject: [PATCH 180/351] [XLA] simplify ShapeUtil::Alignlayouts by removing degenerate dimensions. [TF2XLA] Use iota instruction instead of constant literal. PiperOrigin-RevId: 234076260 --- tensorflow/compiler/tf2xla/xla_helpers.cc | 44 +++------- .../compiler/xla/service/shape_inference.cc | 3 +- tensorflow/compiler/xla/shape_util.cc | 80 +++++++++++-------- tensorflow/compiler/xla/shape_util_test.cc | 11 ++- 4 files changed, 66 insertions(+), 72 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 04a5d93406..7ae903e14f 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -95,47 +95,23 @@ Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis, DataType index_type, const TensorShape& indices_shape, const xla::XlaOp& indices, const xla::XlaOp& on_value, const xla::XlaOp& off_value, xla::XlaOp* one_hot) { - const int indices_dims = indices_shape.dims(); - const int output_dims = indices_dims + 1; - - TensorShape output_shape = indices_shape; - output_shape.InsertDim(axis, depth); - - // Build a Tensor populated with values 0, 1, 2, ... depth. - std::vector linspace_dims(output_dims, 1); - linspace_dims[axis] = depth; - TensorShape linspace_shape(linspace_dims); - Tensor linspace; - switch (index_type) { - case DT_UINT8: - linspace = MakeLinspaceTensor(linspace_shape, depth); - break; - case DT_INT32: - linspace = MakeLinspaceTensor(linspace_shape, depth); - break; - case DT_INT64: - linspace = MakeLinspaceTensor(linspace_shape, depth); - break; - default: - return errors::InvalidArgument("Invalid argument type ", - DataTypeString(index_type)); - } - - xla::BorrowingLiteral linspace_literal; - TF_RETURN_IF_ERROR(HostTensorToBorrowingLiteral(linspace, &linspace_literal)); - // Broadcast the linspace constant across the indices along the new axis, // and test equality at each position. std::vector broadcast_dims(indices_shape.dims()); std::iota(broadcast_dims.begin(), broadcast_dims.begin() + axis, 0); std::iota(broadcast_dims.begin() + axis, broadcast_dims.end(), axis + 1); - xla::XlaOp one_hot_bool = xla::Eq( - indices, xla::ConstantLiteral(builder, linspace_literal), broadcast_dims); + + TensorShape output_shape = indices_shape; + output_shape.InsertDim(axis, depth); + xla::Shape iota_shape; + TF_RETURN_IF_ERROR( + TensorShapeToXLAShape(index_type, output_shape, &iota_shape)); // Selects the user-provided off_value and on_value values. - *one_hot = xla::Select(one_hot_bool, - xla::Broadcast(on_value, output_shape.dim_sizes()), - xla::Broadcast(off_value, output_shape.dim_sizes())); + *one_hot = xla::Select( + xla::Eq(indices, xla::Iota(builder, iota_shape, axis), broadcast_dims), + xla::Broadcast(on_value, output_shape.dim_sizes()), + xla::Broadcast(off_value, output_shape.dim_sizes())); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index a570ee346d..3f4456c1bb 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -836,7 +836,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, ShapeUtil::HumanString(larger_shape)); } if (small_is_dynamic != large_is_dynamic) { - if ((small_dimension_size == 1 && !small_is_dynamic) || + if (small_dimension_size == large_dimension_size || + (small_dimension_size == 1 && !small_is_dynamic) || (large_dimension_size == 1 && !large_is_dynamic)) { // Do nothing. It's OK when the size-1 dimension is not static. } else { diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index e6273c4e7f..d045fc7a9e 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "absl/container/inlined_vector.h" #include "absl/strings/ascii.h" #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" @@ -1256,6 +1257,43 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, const Shape& input_shape, const Shape& output_shape) { CHECK(input_shape.IsArray()); CHECK(output_shape.IsArray()); + // Removing trivial dimensions from the shape simplifies the alignment + // algorithm since ones can go in any position. + if (HasDegenerateDimensions(input_shape) || + HasDegenerateDimensions(output_shape)) { + auto simple_output_shape = + AlignLayouts(DropDegenerateDimensions(input_shape), + DropDegenerateDimensions(output_shape)); + if (!simple_output_shape) { + return absl::nullopt; + } + + auto layout = simple_output_shape->layout().minor_to_major(); + // For each one sized dimension in the output, increment the dimension + // numbers in layout that are more minor than the one. + absl::InlinedVector dim_map; + dim_map.reserve(simple_output_shape->rank()); + for (int64 i = 0; i < output_shape.rank(); ++i) { + if (output_shape.dimensions(i) != 1) { + dim_map.push_back(i); + } + } + for (int64& d : layout) { + d = dim_map[d]; + } + + // Add the ones in descending order to the layout. Descending layouts tend + // to reduce the number of copies inserted in layout assignment. + for (int64 i = output_shape.rank() - 1; i >= 0; --i) { + if (output_shape.dimensions(i) == 1) { + layout.push_back(i); + } + } + Shape output_shape_with_layout = output_shape; + *output_shape_with_layout.mutable_layout()->mutable_minor_to_major() = + layout; + return output_shape_with_layout; + } int64 input_rank = input_shape.rank(); int64 output_rank = output_shape.rank(); @@ -1304,10 +1342,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, if (input_dimension_product != output_dimension_product) { return absl::nullopt; } + // We also need to store an end element so that we know where the last // alignment part ends. alignment.push_back({input_rank, output_rank}); - // Now check if the physical layout can potentially be aligned to the output // shape by changing the physical layout of the output shape. We need to check // that all dimension numbers that belong to the same alignment part appear @@ -1319,40 +1357,23 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, for (int64 i = 0; i < input_rank;) { int64 current_dimension_number = input_dimension_numbers[i]; - // Skip trivial dimensions with a bound of 1. - if (input_shape.dimensions(current_dimension_number) == 1) { - ++i; - continue; - } - - // Calculate the number of non-trivial dimension bounds in the input shape - // belonging to the current alignment part. + // Trivial dimensions are stripped. + CHECK_NE(input_shape.dimensions(current_dimension_number), 1); const int64 current_alignment_index = dimension_to_alignment_index[current_dimension_number]; // Because of the special end element that we added, we can be sure that // 'current_alignment_index' is < alignment.size() - 1. CHECK_LT(current_alignment_index, alignment.size() - 1); - int64 num_non_trivial_dimensions_in_alignment_part = 0; - for (int64 j = alignment[current_alignment_index].first; - j < alignment[current_alignment_index + 1].first; ++j) { - if (input_shape.dimensions(j) != 1) { - ++num_non_trivial_dimensions_in_alignment_part; - } - } // Check that the following 'num_non_trivial_dimensions_in_alignment_part' // dimension numbers (ignoring dimension numbers with dimension bound 1) are // in descending order and belong to the current alignment part. - for (int64 j = 0; j < num_non_trivial_dimensions_in_alignment_part; + for (int64 j = 0; j < alignment[current_alignment_index + 1].first - + alignment[current_alignment_index].first; ++i, ++j) { if (i == input_rank) { return absl::nullopt; } - // Skip trivial dimensions with a bound of 1. - if (input_shape.dimensions(input_dimension_numbers[i]) == 1) { - --j; - continue; - } // If the current dimension number belongs to a different alignment part, // or the dimension numbers are not in descending order, we can return // early. @@ -1363,22 +1384,11 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, } current_dimension_number = input_dimension_numbers[i]; } - // The output dimension numbers that belong to the current alignment part - // need to appear in the same descending order as in the input. Again, we - // can skip dimensions with a bound of 1. + // need to appear in the same descending order as in the input. for (int64 j = alignment[current_alignment_index + 1].second - 1; j >= alignment[current_alignment_index].second; --j) { - if (output_shape.dimensions(j) != 1) { - output_layout.push_back(j); - } - } - } - // Now add all the dimensions with dimension bound 1 at the end of - // 'output_layout'. - for (int64 i = 0; i < output_rank; ++i) { - if (output_shape.dimensions(i) == 1) { - output_layout.push_back(i); + output_layout.push_back(j); } } CHECK_EQ(output_layout.size(), output_rank); diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 126ae58293..020b062f6b 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -761,8 +761,15 @@ TEST(AlignmentTest, AlignLayoutsWithTrivialDimensions) { auto aligned_shape = ShapeUtil::AlignLayouts( input, ShapeUtil::MakeShape(xla::F32, {1, 4, 1, 3, 2, 7, 5, 11, 1})); EXPECT_TRUE(aligned_shape); - EXPECT_THAT(aligned_shape.value().layout().minor_to_major(), - ElementsAre(6, 5, 4, 3, 1, 7, 0, 2, 8)); + EXPECT_TRUE(ShapeUtil::ReshapeIsBitcast(input, aligned_shape.value())); +} + +TEST(AlignmentTest, AlignLayoutsWithAllTrivialDimensions) { + Shape input = + ShapeUtil::MakeShapeWithLayout(xla::F32, {1, 1, 1, 1}, {0, 1, 3, 2}); + auto aligned_shape = ShapeUtil::AlignLayouts( + input, ShapeUtil::MakeShape(xla::F32, {1, 1, 1, 1, 1})); + EXPECT_TRUE(aligned_shape); EXPECT_TRUE(ShapeUtil::ReshapeIsBitcast(input, aligned_shape.value())); } -- GitLab From 4938573f6b7053235d5963dc3ad7a313bab86abc Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Thu, 14 Feb 2019 20:47:37 -0800 Subject: [PATCH 181/351] Fix deprecation message. PiperOrigin-RevId: 234079207 --- tensorflow/lite/python/lite.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py index a05dc28f79..36eb0e298d 100644 --- a/tensorflow/lite/python/lite.py +++ b/tensorflow/lite/python/lite.py @@ -462,7 +462,8 @@ class TFLiteConverter(object): def __setattr__(self, name, value): if name == "post_training_quantize": warnings.warn("Property %s is deprecated, " - "please use set_converter_mode instead." % name) + "please use optimizations=[Optimize.OPTIMIZE_FOR_SIZE]" + " instead." % name) if value: # Use OPTIMIZE_FOR_SIZE for post training for now. self.optimizations = [Optimize.OPTIMIZE_FOR_SIZE] @@ -474,7 +475,8 @@ class TFLiteConverter(object): def __getattribute__(self, name): if name == "post_training_quantize": warnings.warn("Property %s is deprecated, " - "please use get_converter_mode instead." % name) + "please use optimizations=[Optimize.OPTIMIZE_FOR_SIZE]" + " instead." % name) return Optimize.OPTIMIZE_FOR_SIZE in set(self.optimizations) return object.__getattribute__(self, name) -- GitLab From e1ab41387a255fe4a98b76589cd36dc8206c7f77 Mon Sep 17 00:00:00 2001 From: Zhenyu Tan Date: Thu, 14 Feb 2019 21:37:24 -0800 Subject: [PATCH 182/351] Remove error raising in keras compile for optimizers. PiperOrigin-RevId: 234083146 --- tensorflow/python/keras/engine/training.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index e3a54426b8..9fa46904ae 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -41,7 +41,6 @@ from tensorflow.python.keras.engine import training_eager from tensorflow.python.keras.engine import training_generator from tensorflow.python.keras.engine import training_utils from tensorflow.python.keras.engine.network import Network -from tensorflow.python.keras.optimizer_v2 import optimizer_v2 from tensorflow.python.keras.saving import saving_utils from tensorflow.python.keras.utils import data_utils from tensorflow.python.keras.utils import losses_utils @@ -49,7 +48,6 @@ from tensorflow.python.keras.utils.generic_utils import slice_arrays from tensorflow.python.keras.utils.mode_keys import ModeKeys from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.training.checkpointable import base as checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import keras_export @@ -233,12 +231,6 @@ class Model(Network): # Validate that arguments passed by the user to `compile` are supported by # DistributionStrategy. if self._distribution_strategy: - if not isinstance(optimizer, - (tf_optimizer_module.Optimizer, optimizers.TFOptimizer, - optimizer_v2.OptimizerV2)): - raise NotImplementedError( - 'optimizer must be an instance of ' - 'tf.train.Optimizer, not a %s' % type(optimizer)) if sample_weight_mode: raise NotImplementedError('sample_weight_mode is not supported with ' 'DistributionStrategy.') @@ -250,13 +242,6 @@ class Model(Network): 'DistributionStrategy.') loss = loss or {} - if self.run_eagerly and not isinstance( - optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer, - optimizer_v2.OptimizerV2)): - raise ValueError( - 'When running a model in eager execution, the optimizer must be an ' - 'instance of tf.train.Optimizer. Received: ' - '%s' % optimizer) self.optimizer = optimizer # We've disabled automatic dependency tracking for this method, but do want -- GitLab From c442884f8bbe82a02085bf159b3b13dc06f83d0d Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 14 Feb 2019 22:56:15 -0800 Subject: [PATCH 183/351] Fix issue in which the default learning phase was incorrectly captured when using a tf.function in a subclassed Layer or Model. PiperOrigin-RevId: 234088875 --- tensorflow/python/keras/engine/base_layer.py | 12 +++ .../python/keras/engine/base_layer_test.py | 87 ++++++++++++++----- .../python/keras/engine/base_layer_utils.py | 8 ++ tensorflow/python/keras/engine/network.py | 3 + 4 files changed, 89 insertions(+), 21 deletions(-) diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index a68230a2f3..fe86a4145f 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -561,6 +561,16 @@ class Layer(checkpointable.Checkpointable): # Build layer if applicable (if the `build` method has been # overridden). self._maybe_build(inputs) + # Explicitly pass the learning phase placeholder to `call` if + # the `training` argument was left unspecified by the user. + # This behavior is restricted to the managed Keras FuncGraph. + learning_phase_passed_by_framework = False + if (self._expects_training_arg and + not base_layer_utils.training_arg_passed_to_call( + tf_inspect.getfullargspec(self.call), args, kwargs) and + getattr(graph, 'name', None) == 'keras_graph'): + learning_phase_passed_by_framework = True + kwargs['training'] = backend.learning_phase() if not self.dynamic: try: outputs = self.call(inputs, *args, **kwargs) @@ -590,6 +600,8 @@ class Layer(checkpointable.Checkpointable): 'Tensor or a list of Tensors, not None ' '(layer: ' + self.name + ').') if base_layer_utils.have_all_keras_metadata(inputs): + if learning_phase_passed_by_framework: + kwargs.pop('training') inputs, outputs = self._set_connectivity_metadata_( inputs, outputs, args, kwargs) self._handle_activity_regularization(inputs, outputs) diff --git a/tensorflow/python/keras/engine/base_layer_test.py b/tensorflow/python/keras/engine/base_layer_test.py index 109fc1f25e..dc1fbca115 100644 --- a/tensorflow/python/keras/engine/base_layer_test.py +++ b/tensorflow/python/keras/engine/base_layer_test.py @@ -25,6 +25,7 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.eager import context +from tensorflow.python.eager import def_function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util @@ -32,6 +33,7 @@ from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine import base_layer from tensorflow.python.keras.optimizer_v2 import rmsprop +from tensorflow.python.keras.utils import tf_utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops @@ -279,6 +281,70 @@ class BaseLayerTest(keras_parameterized.TestCase): keras.backend.set_learning_phase(0) self.assertEqual(get_learning_phase_value(), 0) + # Cannot be enabled with `run_eagerly=True`, see b/123904578 + @test_util.run_all_in_graph_and_eager_modes + def test_layer_can_return_variable(self): + + class ComputeSum(keras.layers.Layer): + + def __init__(self): + super(ComputeSum, self).__init__() + self.total = variables.Variable( + initial_value=array_ops.zeros((1, 1)), trainable=False) + if not context.executing_eagerly(): + keras.backend.get_session().run(self.total.initializer) + + def call(self, inputs): + self.total.assign_add(inputs) + return self.total + + inputs = keras.Input(shape=(1,)) + model = keras.Model(inputs, ComputeSum()(inputs)) + model.predict(np.ones((1, 1))) + + def _get_layer_with_training_arg(self): + + class TrainingLayer(keras.layers.Layer): + """A layer with a `training` argument in a defuned `call`.""" + + @def_function.function + def call(self, inputs, training=None): + if training is None: + training = keras.backend.learning_phase() + return tf_utils.smart_cond(training, + lambda: array_ops.ones_like(inputs), + lambda: array_ops.zeros_like(inputs)) + + return TrainingLayer() + + @keras_parameterized.run_with_all_model_types + # b/124459427: can't test with `run_eagerly=True` for now. + @test_util.run_in_graph_and_eager_modes + def test_training_arg_in_defun(self): + layer = self._get_layer_with_training_arg() + model = testing_utils.get_model_from_layers([layer], input_shape=(1,)) + model.compile(rmsprop.RMSprop(0.), + loss='mae') + history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(history.history['loss'][0], 1.) + loss = model.evaluate(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(loss, 0.) + + # Test that the argument injection performed in `call` is not active + # when the argument is passed explicitly. + layer = self._get_layer_with_training_arg() + inputs = keras.Input(shape=(1,)) + # Pass `training` by name + outputs = layer(inputs, training=False) + model = keras.Model(inputs, outputs) + model.compile(rmsprop.RMSprop(0.), + loss='mae') + history = model.fit(np.zeros((1, 1)), np.zeros((1, 1))) + self.assertEqual(history.history['loss'][0], 0.) + + +class SymbolicSupportTest(test.TestCase): + def test_using_symbolic_tensors_with_tf_ops(self): # Single-input. x = keras.Input((3,)) @@ -369,27 +435,6 @@ class BaseLayerTest(keras_parameterized.TestCase): function_name = last_entry[2] self.assertEqual(function_name, 'easily_identifiable_name') - # Cannot be enabled with `run_eagerly=True`, see b/123904578 - @test_util.run_all_in_graph_and_eager_modes - def test_layer_can_return_variable(self): - - class ComputeSum(keras.layers.Layer): - - def __init__(self): - super(ComputeSum, self).__init__() - self.total = variables.Variable( - initial_value=array_ops.zeros((1, 1)), trainable=False) - if not context.executing_eagerly(): - keras.backend.get_session().run(self.total.initializer) - - def call(self, inputs): - self.total.assign_add(inputs) - return self.total - - inputs = keras.Input(shape=(1,)) - model = keras.Model(inputs, ComputeSum()(inputs)) - model.predict(np.ones((1, 1))) - @test_util.run_all_in_graph_and_eager_modes class NestedTrackingTest(test.TestCase): diff --git a/tensorflow/python/keras/engine/base_layer_utils.py b/tensorflow/python/keras/engine/base_layer_utils.py index 40ac121738..92123299be 100644 --- a/tensorflow/python/keras/engine/base_layer_utils.py +++ b/tensorflow/python/keras/engine/base_layer_utils.py @@ -382,3 +382,11 @@ def call_context(): yield finally: _call_context.in_call = was_in_call + + +def training_arg_passed_to_call(argspec, args, kwargs): + """Returns whether a user passed the `training` argument in `__call__`.""" + # `argspec.args` starts with ['self', 'inputs'] + full_args = dict(zip(argspec.args[2:], args)) + full_args.update(kwargs) + return 'training' in full_args diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 0e1908a4a3..167cf18948 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -238,6 +238,9 @@ class Network(base_layer.Layer): self._compute_output_and_mask_jointly = True self._is_graph_network = True self._dynamic = False + # `_expects_training_arg` is True since the `training` argument is always + # present in the signature of the `call` method of a graph network. + self._expects_training_arg = True self._input_layers = [] self._output_layers = [] -- GitLab From 0924626228465d4da23ced8a8050a337bb4279de Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Thu, 14 Feb 2019 23:35:53 -0800 Subject: [PATCH 184/351] Change struct initialization to fix compile on Windows. PiperOrigin-RevId: 234092192 --- tensorflow/lite/tools/optimize/node_info_delegate.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/lite/tools/optimize/node_info_delegate.cc b/tensorflow/lite/tools/optimize/node_info_delegate.cc index ccaa69373f..d4d005024b 100644 --- a/tensorflow/lite/tools/optimize/node_info_delegate.cc +++ b/tensorflow/lite/tools/optimize/node_info_delegate.cc @@ -33,11 +33,11 @@ TfLiteStatus NodeInfoDelegatePrepare(TfLiteContext* context, } // namespace TfLiteDelegate CreateNodeInfoDelegate(NodeInfoDelegateParams* params) { - return {.data_ = params, - .Prepare = NodeInfoDelegatePrepare, - .CopyFromBufferHandle = nullptr, - .CopyToBufferHandle = nullptr, - .FreeBufferHandle = nullptr}; + return {/*data_ */ params, + /* Prepare */ NodeInfoDelegatePrepare, + /* CopyFromBufferHandle*/ nullptr, + /* CopyToBufferHandle*/ nullptr, + /* FreeBufferHandle*/ nullptr}; } TfLiteStatus NodeInfoDelegateObserver::OnDelegatePrepareCalled( -- GitLab From 472695a0da668e571739b74d9799d2843caebe63 Mon Sep 17 00:00:00 2001 From: Andr? Susano Pinto Date: Thu, 14 Feb 2019 23:46:14 -0800 Subject: [PATCH 185/351] Raise TypeError if an argument passed to a function with input_signature is not covered by it. PiperOrigin-RevId: 234093173 --- tensorflow/python/eager/function.py | 15 +++++++++++++++ tensorflow/python/eager/function_test.py | 23 ++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index d2c3cb824f..c284a0a2b1 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -949,6 +949,21 @@ class FunctionSpec(object): argument when an input signature is specified, or when the inputs do not conform to the input signature. """ + if self._input_signature is not None: + if len(args) > len(self._input_signature): + raise TypeError( + "When input_signature is provided, only pass arguments " + "covered by it. Received %d argument(s)." % len(args)) + for arg in six.iterkeys(kwargs): + index = self._args_to_indices.get(arg, None) + if index is None: + raise TypeError( + "Function got an unexpected keyword argument %s" % arg) + if index >= len(self._input_signature): + raise TypeError( + "When input_signature is provided, only pass arguments " + "covered by it. Received argument %s." % arg) + args = self._args_to_prepend + args kwargs = dict(kwargs, **self._kwargs_to_include) if not kwargs: diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 34f0b4479b..bd041d09ca 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1439,9 +1439,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): defined(array_ops.ones([2, 1])) # Wrong number of arguments. - with self.assertRaisesRegexp( - ValueError, - 'Arguments and signature arguments do not match.*'): + with self.assertRaisesRegexp(TypeError, 'Received 2 argument\(s\)'): defined(array_ops.ones([2]), array_ops.ones([2])) with self.assertRaisesRegexp(ValueError, 'Structure of Python function inputs.*'): @@ -1471,6 +1469,25 @@ class FunctionTest(test.TestCase, parameterized.TestCase): defined([a], [a, a, a]) defined([a, a], [a, a]) + def testUnderspecifiedInputSignature(self): + @function.defun(input_signature=[ + tensor_spec.TensorSpec([], dtypes.float32), + ]) + def foo(a, training=True): + if training: + return a + else: + return -1.0 * a + + x = constant_op.constant(1.0) + with self.assertRaisesRegexp(TypeError, 'only pass arguments'): + foo(x, training=True) + + with self.assertRaisesRegexp(TypeError, 'only pass arguments'): + foo(x, training=False) + + self.assertAllEqual(x.numpy(), foo(x).numpy()) + def testInputSignatureWithPartialFunction(self): self.skipTest('b/124441704') def full_function(a, b, c=3.0): -- GitLab From fe89c1b0e5376c15ebd040e9e69382e5f837f5d5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 00:12:33 -0800 Subject: [PATCH 186/351] Fix two issues in profiler: 1. copy profiler context when starting profiler server. Because the original profiler context may be deleted by user. 2. DCHECK will be removed in production. PiperOrigin-RevId: 234096598 --- .../profiler/rpc/client/capture_profile.cc | 3 +- .../core/profiler/rpc/profiler_server.cc | 28 ++++++++++--------- .../profiler/rpc/profiler_service_impl.cc | 6 ++-- .../core/profiler/rpc/profiler_service_impl.h | 2 +- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/tensorflow/core/profiler/rpc/client/capture_profile.cc b/tensorflow/core/profiler/rpc/client/capture_profile.cc index a543111d9e..708ce5c47b 100644 --- a/tensorflow/core/profiler/rpc/client/capture_profile.cc +++ b/tensorflow/core/profiler/rpc/client/capture_profile.cc @@ -39,7 +39,8 @@ constexpr uint64 kMaxEvents = 1000000; string GetCurrentTimeStampAsString() { char s[128]; std::time_t t = std::time(nullptr); - DCHECK_NE(std::strftime(s, sizeof(s), "%F_%T", std::localtime(&t)), 0); + auto result = std::strftime(s, sizeof(s), "%F_%T", std::localtime(&t)); + DCHECK_NE(result, 0); return s; } diff --git a/tensorflow/core/profiler/rpc/profiler_server.cc b/tensorflow/core/profiler/rpc/profiler_server.cc index bef1e3c519..8d15068706 100644 --- a/tensorflow/core/profiler/rpc/profiler_server.cc +++ b/tensorflow/core/profiler/rpc/profiler_server.cc @@ -28,19 +28,21 @@ std::unique_ptr StartProfilerServer( Env* env = profiler_context->eager_context != nullptr ? profiler_context->eager_context->TFEnv() : Env::Default(); - return WrapUnique( - env->StartThread({}, "profiler server", [profiler_context, port]() { - string server_address = strings::StrCat("0.0.0.0:", port); - std::unique_ptr service = - CreateProfilerService(profiler_context); - ::grpc::ServerBuilder builder; - builder.AddListeningPort(server_address, - ::grpc::InsecureServerCredentials()); - builder.RegisterService(service.get()); - std::unique_ptr<::grpc::Server> server(builder.BuildAndStart()); - LOG(INFO) << "Profiling Server listening on " << server_address; - server->Wait(); - })); + // Starting the server in the child thread may be delay and user may already + // delete the profiler context at that point. So we need to make a copy. + ProfilerContext ctx = *profiler_context; + return WrapUnique(env->StartThread({}, "profiler server", [ctx, port]() { + string server_address = strings::StrCat("0.0.0.0:", port); + std::unique_ptr service = + CreateProfilerService(ctx); + ::grpc::ServerBuilder builder; + builder.AddListeningPort(server_address, + ::grpc::InsecureServerCredentials()); + builder.RegisterService(service.get()); + std::unique_ptr<::grpc::Server> server(builder.BuildAndStart()); + LOG(INFO) << "Profiling Server listening on " << server_address; + server->Wait(); + })); } } // namespace tensorflow diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.cc b/tensorflow/core/profiler/rpc/profiler_service_impl.cc index 8704d9b64c..9421e91539 100644 --- a/tensorflow/core/profiler/rpc/profiler_service_impl.cc +++ b/tensorflow/core/profiler/rpc/profiler_service_impl.cc @@ -26,8 +26,8 @@ namespace { // TODO(fishx): Rename TPUProfiler to something more generic. class ProfilerServiceImpl : public TPUProfiler::Service { public: - explicit ProfilerServiceImpl(ProfilerContext* const profiler_context) - : profiler_context_(*profiler_context) {} + explicit ProfilerServiceImpl(const ProfilerContext& profiler_context) + : profiler_context_(profiler_context) {} ~ProfilerServiceImpl() override {} ::grpc::Status Monitor(::grpc::ServerContext* ctx, const MonitorRequest* req, @@ -69,7 +69,7 @@ class ProfilerServiceImpl : public TPUProfiler::Service { } // namespace std::unique_ptr CreateProfilerService( - ProfilerContext* const profiler_context) { + const ProfilerContext& profiler_context) { return MakeUnique(profiler_context); } diff --git a/tensorflow/core/profiler/rpc/profiler_service_impl.h b/tensorflow/core/profiler/rpc/profiler_service_impl.h index 9d27f71dfa..54c5520603 100644 --- a/tensorflow/core/profiler/rpc/profiler_service_impl.h +++ b/tensorflow/core/profiler/rpc/profiler_service_impl.h @@ -25,7 +25,7 @@ limitations under the License. namespace tensorflow { std::unique_ptr CreateProfilerService( - ProfilerContext* const profiler_context); + const ProfilerContext& profiler_context); } // namespace tensorflow #endif // TENSORFLOW_CORE_PROFILER_RPC_PROFILER_SERVICE_IMPL_H_ -- GitLab From 220643ae0a5c9c34e53569e707f6015efb332702 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 01:02:41 -0800 Subject: [PATCH 187/351] compat: Update forward compatibility horizon to 2019-02-15 PiperOrigin-RevId: 234101640 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 1a98e0725d..ef626fc41f 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 14) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 15) @tf_export("compat.forward_compatible") -- GitLab From 83bb7d278ccd5c32b18d1f502f6f0d77d80ab038 Mon Sep 17 00:00:00 2001 From: Pariksheet Pinjari Date: Fri, 15 Feb 2019 14:39:58 +0530 Subject: [PATCH 188/351] Removed warning in sparse_tensor.h Ensure the variable is initilaized --- tensorflow/core/util/sparse/sparse_tensor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/sparse/sparse_tensor.h b/tensorflow/core/util/sparse/sparse_tensor.h index cd168f6597..4e53c59ba3 100644 --- a/tensorflow/core/util/sparse/sparse_tensor.h +++ b/tensorflow/core/util/sparse/sparse_tensor.h @@ -63,7 +63,7 @@ class SparseTensor { ix.shape().dim_size(0), ", values = ", vals.shape().dim_size(0), ")")); } - int dims; + int dims = 0; TF_RETURN_IF_ERROR(GetDimsFromIx(ix, &dims)); if (order.size() != dims) { return Status(error::INVALID_ARGUMENT, -- GitLab From 02252244f194dae479391a4dd017d7f76c9b6778 Mon Sep 17 00:00:00 2001 From: Andr? Susano Pinto Date: Fri, 15 Feb 2019 01:36:50 -0800 Subject: [PATCH 189/351] If a TF Function has an input_signature automatically convert the inputs to it. PiperOrigin-RevId: 234105123 --- tensorflow/python/eager/def_function.py | 4 +- tensorflow/python/eager/function.py | 98 +++++++++++++++------- tensorflow/python/eager/function_test.py | 101 +++++++++++++++++------ 3 files changed, 145 insertions(+), 58 deletions(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index a82b14ec2e..bffb439683 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -861,8 +861,8 @@ def function(func=None, def f(x): return tf.add(x, 1.) ``` - When an `input_signature` is specified, the callable will only accept `Tensor` - (or NumPy `ndarray`) objects as arguments. + When an `input_signature` is specified, the callable will convert the inputs + to the specified TensorSpecs. _Tracing and staging_ diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index c284a0a2b1..ce16e6a735 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -995,42 +995,82 @@ class FunctionSpec(object): # opposed to named arguments called in a keyword-like fashion. kwargs.pop(arg) inputs = args + _deterministic_dict_values(arg_indices_to_values) - flat_inputs = nest.flatten(inputs) - - # Check for NumPy arrays in arguments and convert them to Tensors. - # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps - # finding a way to store them directly in the cache key (currently not - # possible since ndarrays are not hashable). - need_packing = False - for index, value in enumerate(flat_inputs): - if type(value) == np.ndarray: - flat_inputs[index] = constant_op.constant(value) - need_packing = True - if need_packing: - inputs = nest.pack_sequence_as( - structure=inputs, flat_sequence=flat_inputs) + if self._input_signature is None: + inputs = _convert_numpy_inputs(inputs) return inputs, kwargs else: assert not kwargs - signature_relevant_inputs = inputs[:len(self._input_signature)] - if not is_same_structure(self._input_signature, - signature_relevant_inputs): - raise ValueError("Structure of Python function inputs does not match " - "input_signature.") - signature_inputs_flat = nest.flatten(signature_relevant_inputs) - if any( - not pywrap_tensorflow.IsTensor(arg) for arg in signature_inputs_flat): - raise ValueError("When input_signature is provided, all inputs to " - "the Python function must be Tensors.") - if any(not spec.is_compatible_with(other) for spec, other in zip( - self._flat_input_signature, signature_inputs_flat)): - raise ValueError("Python inputs incompatible with input_signature: " - "inputs (%s), input_signature (%s)" % - (str(inputs), str(self._input_signature))) + inputs = _convert_inputs_to_signature( + inputs, + self._input_signature, + self._flat_input_signature) return inputs, {} +def _convert_numpy_inputs(inputs): + """Convert numpy array inputs to tensors.""" + flat_inputs = nest.flatten(inputs) + + # Check for NumPy arrays in arguments and convert them to Tensors. + # TODO(nareshmodi): Skip ndarray conversion to tensor altogether, perhaps + # finding a way to store them directly in the cache key (currently not + # possible since ndarrays are not hashable). + need_packing = False + for index, value in enumerate(flat_inputs): + if type(value) == np.ndarray: + flat_inputs[index] = constant_op.constant(value) + need_packing = True + if need_packing: + return nest.pack_sequence_as( + structure=inputs, flat_sequence=flat_inputs) + else: + return inputs + + +def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature): + """Convert inputs to pass into a function with an explicit signature.""" + try: + # TODO(b/124370185): Use all elements as inputs to throw an error if there + # are ignored arguments. Calling with arguments that are not part of the + # signature should throw an error. + flatten_inputs = nest.flatten_up_to( + input_signature, + inputs[:len(input_signature)]) + except ValueError: + raise ValueError("Structure of Python function inputs does not match " + "input_signature. Inputs (%s), input_signature(%s)." % + (str(inputs), str(input_signature))) + + need_packing = False + for index, (value, spec) in enumerate(zip(flatten_inputs, + flat_input_signature)): + if not pywrap_tensorflow.IsTensor(value): + try: + flatten_inputs[index] = ops.convert_to_tensor( + value, dtype_hint=spec.dtype) + need_packing = True + except ValueError: + raise ValueError("When input_signature is provided, all inputs to " + "the Python function must be convertible to tensors." + "Inputs (%s), input_signature(%s)." % + (str(inputs), str(input_signature))) + + if any(not spec.is_compatible_with(other) for spec, other in zip( + flat_input_signature, + flatten_inputs)): + raise ValueError("Python inputs incompatible with input_signature: " + "inputs (%s), input_signature (%s)" % + (str(inputs), str(input_signature))) + + if need_packing: + inputs = nest.pack_sequence_as( + structure=input_signature, + flat_sequence=flatten_inputs) + + return inputs + + class Function(object): """Wrapper class for the graph functions defined for a Python function. diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index bd041d09ca..7c796ae218 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1319,7 +1319,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): self.assertEqual(func_b.numpy(), b) self.assertEqual(func_c.numpy(), c) - def testInputSignatureWithCompatibleInputs(self): + def testInputSignatureWithMatchingInputs(self): def foo(a): self.assertEqual(a.shape, (2,)) @@ -1353,22 +1353,46 @@ class FunctionTest(test.TestCase, parameterized.TestCase): self.assertLen(defined._function_cache, 1) self.assertAllEqual(out, b) + def testInputSignatureWithCompatibleInputs(self): + + rank2_spec = tensor_spec.TensorSpec(shape=(None, None), + dtype=dtypes.float32) + + @function.defun(input_signature=[rank2_spec]) + def func(a): + self.assertEqual([None, None], a.shape.as_list()) + return array_ops.shape(a) + + self.assertAllEqual([3, 1], func([[0], [1.0], [1]])) + self.assertAllEqual([2, 2], func(numpy.array([[1, 1], [2, 2]]))) + + with self.assertRaisesRegexp(ValueError, 'incompatible'): + func([0.0, 1.0, 2.0]) # Wrong shape. + + with self.assertRaisesRegexp(ValueError, 'incompatible'): + func([['wrong dtype']]) + def testNestedInputSignatures(self): + def expected_foo(a, b): + return [a, b] + + @function.defun(input_signature=[ + [tensor_spec.TensorSpec((2, None), dtypes.float32)] * 2, + tensor_spec.TensorSpec((1,), dtypes.float32), + ]) def foo(a, b): self.assertEqual(a[0]._shape_tuple(), (2, None)) self.assertEqual(a[1]._shape_tuple(), (2, None)) self.assertEqual(b._shape_tuple(), (1,)) return [a, b] - signature = [[tensor_spec.TensorSpec((2, None), dtypes.float32)] * 2, - tensor_spec.TensorSpec((1,), dtypes.float32)] - defined = function.defun(foo, input_signature=signature) a = array_ops.ones([2, 1]) b = array_ops.ones([1]) - out = defined([a, a], b) - self.assertLen(defined._function_cache, 1) - nest.assert_same_structure(out, [[a, a], b]) + expected = expected_foo([a, a], b) + out = foo([a, a], b) + self.assertLen(foo._function_cache, 1) + nest.assert_same_structure(out, expected) self.assertAllEqual(out[0][0], a) self.assertAllEqual(out[0][1], a) self.assertAllEqual(out[1], b) @@ -1377,33 +1401,58 @@ class FunctionTest(test.TestCase, parameterized.TestCase): a = array_ops.ones([2, 3]) b = array_ops.ones([2, 5]) c = array_ops.ones([1]) - out = defined([a, b], c) - self.assertLen(defined._function_cache, 1) - nest.assert_same_structure(out, [[a, b], c]) + expected = expected_foo([a, b], c) + out = foo([a, b], c) + self.assertLen(foo._function_cache, 1) + nest.assert_same_structure(out, expected) + self.assertAllEqual(out[0][0], a) + self.assertAllEqual(out[0][1], b) + self.assertAllEqual(out[1], c) + + # Passing compatible inputs should work. + a = a.numpy().tolist() + b = b.numpy().tolist() + c = c.numpy().tolist() + out = foo([a, b], c) + self.assertLen(foo._function_cache, 1) + nest.assert_same_structure(out, expected) self.assertAllEqual(out[0][0], a) self.assertAllEqual(out[0][1], b) self.assertAllEqual(out[1], c) + def testNestedInputSignaturesWithDict(self): + def expected_bar(a): + return a + + @function.defun(input_signature=[{ + 'a': tensor_spec.TensorSpec((2, None), dtypes.float32), + 'b': tensor_spec.TensorSpec((2, None), dtypes.float32), + 'c': tensor_spec.TensorSpec((1,), dtypes.float32)}]) def bar(a): self.assertEqual(a['a']._shape_tuple(), (2, None)) self.assertEqual(a['b']._shape_tuple(), (2, None)) self.assertEqual(a['c']._shape_tuple(), (1,)) return a - signature = [{ - 'a': tensor_spec.TensorSpec((2, None), dtypes.float32), - 'b': tensor_spec.TensorSpec((2, None), dtypes.float32), - 'c': tensor_spec.TensorSpec((1,), dtypes.float32) - }] a = array_ops.ones([2, 3]) b = array_ops.ones([1]) inputs = {'a': a, 'b': a, 'c': b} - defined = def_function.function(bar, input_signature=signature) - out = defined(inputs) - nest.assert_same_structure(out, inputs) - self.assertAllEqual(out['a'], inputs['a']) - self.assertAllEqual(out['b'], inputs['b']) - self.assertAllEqual(out['c'], inputs['c']) + expected = expected_bar(inputs) + out = bar(inputs) + nest.assert_same_structure(out, expected) + self.assertAllEqual(out['a'], expected['a']) + self.assertAllEqual(out['b'], expected['b']) + self.assertAllEqual(out['c'], expected['c']) + + # Passing compatible inputs should work. + a = a.numpy().tolist() + b = b.numpy().tolist() + inputs = {'a': a, 'b': a, 'c': b} + out = bar(inputs) + nest.assert_same_structure(out, expected) + self.assertAllEqual(out['a'], expected['a']) + self.assertAllEqual(out['b'], expected['b']) + self.assertAllEqual(out['c'], expected['c']) def testInputSignatureMustBeSequenceOfTensorSpecs(self): @@ -1503,7 +1552,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase): self.assertEqual(func_b.numpy(), b) self.assertEqual(func_c.numpy(), c) - def testInputSignatureForFunctionWithNonTensorInputsNotAllowed(self): + def testInputSignatureConversionWithDefaultArg(self): def foo(a, training=True): if training: @@ -1517,11 +1566,9 @@ class FunctionTest(test.TestCase, parameterized.TestCase): ] defined = def_function.function(foo, input_signature=signature) a = constant_op.constant(1.0) - with self.assertRaisesRegexp( - ValueError, - 'When input_signature is provided, all inputs to ' - 'the Python function must be Tensors.'): - defined(a, training=True) + self.assertAllEqual(a.numpy(), defined(a)) + self.assertAllEqual(a.numpy(), defined(a, training=True)) + self.assertAllEqual(-a.numpy(), defined(a, training=False)) def testInputSignatureWithKeywordPositionalArgs(self): -- GitLab From 8786cf595c561c2bfe3494d060596db4f276ccd9 Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Fri, 15 Feb 2019 02:56:09 -0800 Subject: [PATCH 190/351] Don't enter module name scope for __getattr__ and friends. PiperOrigin-RevId: 234112456 --- tensorflow/python/module/module.py | 6 +++++- tensorflow/python/module/module_test.py | 28 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py index 53b410985d..6e58bb2a70 100644 --- a/tensorflow/python/module/module.py +++ b/tensorflow/python/module/module.py @@ -38,7 +38,11 @@ class ModuleMetaclass(type): def __new__(mcs, name, bases, clsdict): for key, value in clsdict.items(): - if key in ("__init__", "name_scope"): + if key == "name_scope": + continue + + elif key.startswith("__") and key != "__call__": + # Don't patch methods like `__getattr__` or `__del__`. continue elif tf_inspect.isfunction(value): diff --git a/tensorflow/python/module/module_test.py b/tensorflow/python/module/module_test.py index 62d9fe1237..cd52954971 100644 --- a/tensorflow/python/module/module_test.py +++ b/tensorflow/python/module/module_test.py @@ -135,6 +135,34 @@ class TestModuleNaming(test.TestCase): self.assertEqual("", get_name_scope()) + def test_get_attr_doesnt_enter_name_scope(self): + scope_names = [] + + class GetAttrModule(module.Module): + + def __getattr__(self, name): + scope_names.append((name, get_name_scope())) + return super(GetAttrModule, self).__getattr__(name) + + mod = GetAttrModule() + with self.assertRaises(AttributeError): + mod.does_not_exist # pylint: disable=pointless-statement + self.assertIn(("does_not_exist", ""), scope_names) + + def test_get_attribute_doesnt_enter_name_scope(self): + scope_names = [] + + class GetAttributeModule(module.Module): + + def __getattribute__(self, name): + scope_names.append((name, get_name_scope())) + return super(GetAttributeModule, self).__getattribute__(name) + + mod = GetAttributeModule() + with self.assertRaises(AttributeError): + mod.does_not_exist # pylint: disable=pointless-statement + self.assertIn(("does_not_exist", ""), scope_names) + class VariableNamingTest(test.TestCase): -- GitLab From 5d6f896ed5c23f74184191ee2b3bef39466e4ddb Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 15 Feb 2019 03:00:20 -0800 Subject: [PATCH 191/351] [XLA:GPU] Don't generate illegal LLVM IR for integral dot operations PiperOrigin-RevId: 234112776 --- tensorflow/compiler/xla/service/gpu/ir_emitter.cc | 11 +++++++++-- .../compiler/xla/tests/dot_operation_test.cc | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index cb13e727a4..8f010ab27a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -492,8 +492,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { result = llvm::ConstantAggregateZero::get(lhs_array.GetElementLlvmType()); result = InsertValue(result, value.first, {0}); result = InsertValue(result, value.second, {1}); - } else { + } else if (ShapeUtil::ElementIsFloating(lhs_shape)) { result = FMul(lhs_value, rhs_value); + } else { + TF_RET_CHECK(ShapeUtil::ElementIsIntegral(lhs_shape)); + result = Mul(lhs_value, rhs_value); } target_array.EmitWriteArrayElement(/*index=*/element_index, result, &b_); return Status::OK(); @@ -583,9 +586,13 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { llvm::Value* accum_imag = Imag(accum, &b_); llvm::Value* imag_sum = FAdd(accum_imag, value.second); updated_accum = InsertValue(updated_accum, imag_sum, {1}); - } else { + } else if (ShapeUtil::ElementIsFloating(lhs_shape)) { llvm::Value* product = FMul(lhs_element, rhs_element); updated_accum = FAdd(accum, product); + } else { + TF_RET_CHECK(ShapeUtil::ElementIsIntegral(lhs_shape)); + llvm::Value* product = Mul(lhs_element, rhs_element); + updated_accum = Add(accum, product); } Store(updated_accum, accum_address); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 262b77264f..5d910a193d 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -1313,5 +1313,20 @@ ENTRY main { EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); } +XLA_TEST_F(DotOperationTextTest, DISABLED_ON_CPU(GpuIntegerDotCodegen)) { + absl::string_view hlo_string = + R"( +HloModule SmallIntegerDot + +ENTRY SmallIntegerDot { + arg0 = s32[1,2,2] parameter(0) + arg1 = s32[1,2,1] parameter(1) + ROOT dot = s32[1,2,1] dot(arg0, arg1), lhs_batch_dims={0}, lhs_contracting_dims={2}, rhs_batch_dims={0}, rhs_contracting_dims={1} +} +)"; + + EXPECT_TRUE(RunAndCompare(hlo_string, ErrorSpec{4e-3, 4e-3})); +} + } // namespace } // namespace xla -- GitLab From f8b35e00afe09c8606bcb0441a51be8bd38168d2 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 15 Feb 2019 03:39:39 -0800 Subject: [PATCH 192/351] Choose the correct output type for ResizeNearestNeighbour. For ResizeNearestNeighbour the output type should be the same as the input type. So far, we always chose output type F32, which is correct for ResizeBilinear, but not for ResizeNearestNeighbour. Finally replace Adds which are just broadcasts by BroadcastInDim. PiperOrigin-RevId: 234116435 --- .../tf2xla/kernels/image_resize_ops.cc | 145 ++++++++++-------- tensorflow/python/ops/image_ops_test.py | 4 +- 2 files changed, 85 insertions(+), 64 deletions(-) diff --git a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc index b96d45316f..d19d48e5dd 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_resize_ops.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "absl/types/span.h" #include "tensorflow/compiler/tf2xla/type_util.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" @@ -134,14 +135,15 @@ int64 CalculateUpperPadding(int64 in_size, int64 out_size, int64 kernel_size, // If the 2D kernel would be very large, the 1D kernel can be applied once in // each dimension due to the symmetry of the kernel along all axis to reduce the // computational intensity. -xla::XlaOp MakeBilinear1DKernel(xla::XlaBuilder* builder, int64 n) { +xla::XlaOp MakeBilinear1DKernel(xla::XlaBuilder* builder, + xla::PrimitiveType type, int64 n) { std::vector kernel(n * 2 - 1); for (int64 i = 0; i < n; ++i) { float v = (i + 1.0f) / n; kernel[i] = v; kernel[n * 2 - 2 - i] = v; } - return xla::ConstantR1(builder, kernel); + return xla::ConvertElementType(xla::ConstantR1(builder, kernel), type); } // Unlike the bilinear kernel, which is triangular, the nearest neighbor @@ -153,11 +155,12 @@ xla::XlaOp MakeBilinear1DKernel(xla::XlaBuilder* builder, int64 n) { // to the right (because an existing non TPU kernel // for nearest neighbor resize already chose to default to the right, // so we want to be consistent). -xla::XlaOp MakeNearestNeighbor1DKernel(xla::XlaBuilder* builder, int64 n) { +xla::XlaOp MakeNearestNeighbor1DKernel(xla::XlaBuilder* builder, + xla::PrimitiveType type, int64 n) { std::vector kernel(n * 2 - 1, 0.0f); std::fill(&kernel[n / 2], &kernel[(3 * n) / 2], 1.0f); - return xla::ConstantR1(builder, kernel); + return xla::ConvertElementType(xla::ConstantR1(builder, kernel), type); } // Kernels with more than 16 spatial elements are considered intense and the @@ -165,42 +168,66 @@ xla::XlaOp MakeNearestNeighbor1DKernel(xla::XlaBuilder* builder, int64 n) { const int64 kMax2DKernelSize = 16; xla::XlaOp MakeGeneralResizeKernel(xla::XlaBuilder* builder, + xla::PrimitiveType type, absl::Span kernel_size, int64 channels, bool is_kernel_bilinear) { auto make_kernel_func = is_kernel_bilinear ? MakeBilinear1DKernel : MakeNearestNeighbor1DKernel; - auto depthwise_kernel = xla::Broadcast( - xla::Zero(builder, xla::F32), - {(2 * kernel_size[0] - 1), (2 * kernel_size[1] - 1), channels, 1}); + std::vector depthwise_kernel_sizes = { + (2 * kernel_size[0] - 1), (2 * kernel_size[1] - 1), channels, 1}; + auto depthwise_kernel = + xla::BroadcastInDim(make_kernel_func(builder, type, kernel_size[1]), + depthwise_kernel_sizes, /*broadcast_dimensions=*/{1}); - return xla::Mul( - xla::Add(depthwise_kernel, make_kernel_func(builder, kernel_size[1]), - /*broadcast_dimensions=*/{1}), - make_kernel_func(builder, kernel_size[0]), - /*broadcast_dimensions=*/{0}); + return xla::Mul(depthwise_kernel, + make_kernel_func(builder, type, kernel_size[0]), + /*broadcast_dimensions=*/{0}); } xla::XlaOp MakeGeneralResizeKernelInDim(xla::XlaBuilder* builder, + xla::PrimitiveType type, absl::Span kernel_size, int64 channels, int64 dim, bool is_kernel_bilinear) { auto make_kernel_func = is_kernel_bilinear ? MakeBilinear1DKernel : MakeNearestNeighbor1DKernel; - auto depthwise_kernel = - xla::Broadcast(xla::Zero(builder, xla::F32), - {dim == 0 ? (2 * kernel_size[0] - 1) : 1, - dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels, 1}); - return xla::Add(depthwise_kernel, make_kernel_func(builder, kernel_size[dim]), - /*broadcast_dimensions=*/{dim}); + std::vector depthwise_kernel_sizes = { + dim == 0 ? (2 * kernel_size[0] - 1) : 1, + dim == 1 ? (2 * kernel_size[1] - 1) : 1, channels, 1}; + return xla::BroadcastInDim(make_kernel_func(builder, type, kernel_size[dim]), + depthwise_kernel_sizes, + /*broadcast_dimensions=*/{dim}); +} + +xla::XlaOp BroadcastSpatialDimensions(xla::XlaBuilder* builder, + const xla::XlaOp& input, + int32 spatial_dimensions_offset, + absl::Span in_size, + absl::Span out_size) { + // Add broadcasts to handle expanding from a size == 1 dimension to a + // size > 1 dimension. + auto broadcast_shape_or_status = builder->GetShape(input); + if (!broadcast_shape_or_status.ok()) { + return builder->ReportError(broadcast_shape_or_status.status()); + } + xla::Shape broadcast_shape = broadcast_shape_or_status.ValueOrDie(); + for (int32 i = 0; i < in_size.size(); ++i) { + if (in_size[i] == 1 && out_size[i] > 1) { + broadcast_shape.set_dimensions(spatial_dimensions_offset + i, + out_size[i]); + } + } + return xla::BroadcastInDim(input, broadcast_shape.dimensions(), + /*broadcast_dimensions=*/{0, 1, 2, 3}); } xla::XlaOp ResizeUsingDilationAndConvolution( - xla::XlaBuilder* builder, const xla::XlaOp& input, - const int num_spatial_dims, std::vector in_size, - std::vector out_size, const int64 channels, const bool align_corners, - bool is_kernel_bilinear) { + xla::XlaBuilder* builder, const xla::XlaOp& input, xla::PrimitiveType type, + const int num_spatial_dims, absl::Span in_size, + absl::Span out_size, const int64 channels, + const bool align_corners, bool is_kernel_bilinear) { // Picture for a 1x3 to 1x4 bilinear resize: // stride = 2, kernel size = 3 // Input: @@ -287,7 +314,7 @@ xla::XlaOp ResizeUsingDilationAndConvolution( // Split convolutions into independent dimensions if they would be a very // large kernel. if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) { - xla::XlaOp kernel = MakeGeneralResizeKernel(builder, dims.kernel_size, + xla::XlaOp kernel = MakeGeneralResizeKernel(builder, type, dims.kernel_size, channels, is_kernel_bilinear); output = xla::ConvGeneralDilated(input_data, kernel, dims.stride, @@ -299,7 +326,7 @@ xla::XlaOp ResizeUsingDilationAndConvolution( /*feature_group_count=*/channels); } else { xla::XlaOp kernel0 = MakeGeneralResizeKernelInDim( - builder, dims.kernel_size, channels, 0, is_kernel_bilinear); + builder, type, dims.kernel_size, channels, 0, is_kernel_bilinear); output = xla::ConvGeneralDilated( input_data, kernel0, {dims.stride[0], 1}, /*padding=*/ @@ -308,7 +335,7 @@ xla::XlaOp ResizeUsingDilationAndConvolution( /*rhs_dilation=*/{1, 1}, dimension_numbers, /*feature_group_count=*/channels); xla::XlaOp kernel1 = MakeGeneralResizeKernelInDim( - builder, dims.kernel_size, channels, 1, is_kernel_bilinear); + builder, type, dims.kernel_size, channels, 1, is_kernel_bilinear); output = xla::ConvGeneralDilated( output, kernel1, {1, dims.stride[1]}, /*padding=*/ @@ -320,19 +347,14 @@ xla::XlaOp ResizeUsingDilationAndConvolution( // Add broadcasts to handle expanding from a size == 1 dimension to a // size > 1 dimension. - for (int i = 0; i < num_spatial_dims; ++i) { - if (in_size[i] == 1 && out_size[i] > 1) { - output = xla::Add(output, xla::ConstantR1(builder, out_size[i], 0), - /*broadcast_dimensions=*/{1 + i}); - } - } - return output; + return BroadcastSpatialDimensions( + builder, output, /*spatial_dimensions_offset=*/1, in_size, out_size); } xla::XlaOp ResizeUsingDilationAndConvolutionGradOp( - xla::XlaBuilder* builder, const xla::XlaOp& grad, - const int num_spatial_dims, std::vector in_size, - std::vector grad_size, const int64 channels, + xla::XlaBuilder* builder, const xla::XlaOp& grad, xla::PrimitiveType type, + const int num_spatial_dims, absl::Span in_size, + absl::Span grad_size, const int64 channels, const bool align_corners, bool is_kernel_bilinear) { ResizeConvolutionDims dims = ComputeResizeConvolutionParameters(in_size, grad_size, align_corners); @@ -353,19 +375,14 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp( dimension_numbers.set_kernel_output_feature_dimension(num_spatial_dims); xla::XlaOp output; if (dims.kernel_size[0] * dims.kernel_size[1] < kMax2DKernelSize) { - xla::XlaOp kernel = MakeGeneralResizeKernel(builder, dims.kernel_size, + xla::XlaOp kernel = MakeGeneralResizeKernel(builder, type, dims.kernel_size, channels, is_kernel_bilinear); // Broadcast the input kernel where the forward op expanded from a size == 1 // dimension to a size > 1 dimension. This has the effect of summing the // gradient contributions in that dimension. - for (int i = 0; i < num_spatial_dims; ++i) { - if (in_size[i] == 1 && grad_size[i] > 1) { - kernel = - xla::Add(kernel, xla::ConstantR1(builder, grad_size[i], 0), - /*broadcast_dimensions=*/{i}); - } - } + kernel = BroadcastSpatialDimensions( + builder, kernel, /*spatial_dimensions_offset=*/0, in_size, grad_size); output = xla::ConvGeneralDilated( grad, kernel, /*window_strides=*/dims.kernel_size, @@ -377,22 +394,22 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp( /*feature_group_count=*/channels); } else { xla::XlaOp kernel0 = MakeGeneralResizeKernelInDim( - builder, dims.kernel_size, channels, 0, is_kernel_bilinear); + builder, type, dims.kernel_size, channels, 0, is_kernel_bilinear); xla::XlaOp kernel1 = MakeGeneralResizeKernelInDim( - builder, dims.kernel_size, channels, 1, is_kernel_bilinear); + builder, type, dims.kernel_size, channels, 1, is_kernel_bilinear); // Broadcast the input kernel where the forward op expanded from a // size == 1 dimension to a size > 1 dimension. This has the effect of // summing the gradient contributions in that dimension. if (in_size[0] == 1 && grad_size[0] > 1) { - kernel0 = - xla::Add(kernel0, xla::ConstantR1(builder, grad_size[0], 0), - /*broadcast_dimensions=*/{0}); + kernel0 = BroadcastSpatialDimensions(builder, kernel0, + /*spatial_dimensions_offset=*/0, {1}, + {grad_size[0]}); } if (in_size[1] == 1 && grad_size[1] > 1) { - kernel1 = - xla::Add(kernel0, xla::ConstantR1(builder, grad_size[1], 0), - /*broadcast_dimensions=*/{1}); + kernel1 = BroadcastSpatialDimensions(builder, kernel0, + /*spatial_dimensions_offset=*/0, + in_size, grad_size); } output = xla::ConvGeneralDilated( @@ -423,7 +440,7 @@ xla::XlaOp ResizeUsingDilationAndConvolutionGradOp( } } if (pad_output) { - output = xla::Pad(output, xla::ConstantR0(builder, 0.0f), padding); + output = xla::Pad(output, xla::Zero(builder, type), padding); } return output; } @@ -458,6 +475,7 @@ void GeneralCompile(XlaOpKernelContext* ctx, bool align_corners_, const int num_spatial_dims = 2; xla::XlaOp input = ctx->Input(0); + xla::PrimitiveType input_type = ctx->input_xla_type(0); // If in_size[i] > 1 and out_size[i] == 1, slice out the first input in // dimension i. @@ -475,8 +493,11 @@ void GeneralCompile(XlaOpKernelContext* ctx, bool align_corners_, {batch, in_size[0], in_size[1], channels}, {1, 1, 1, 1}); } - // Output is always type float. - input = xla::ConvertElementType(input, xla::F32); + // Output is always type float if 'is_kernel_bilinear' is true. + if (is_kernel_bilinear) { + input = xla::ConvertElementType(input, xla::F32); + input_type = xla::F32; + } // Special Case: // Instead of doing a ResizeUsingDilationAndConvolution directly, @@ -504,19 +525,19 @@ void GeneralCompile(XlaOpKernelContext* ctx, bool align_corners_, std::vector next_out_size = {(in_size[0] - 1) * 2 + 1, (in_size[1] - 1) * 2 + 1}; output = ResizeUsingDilationAndConvolution( - b, input, num_spatial_dims, in_size, next_out_size, channels, - align_corners_, is_kernel_bilinear); + b, input, input_type, num_spatial_dims, in_size, next_out_size, + channels, align_corners_, is_kernel_bilinear); input = output; in_size = next_out_size; } else { output = ResizeUsingDilationAndConvolution( - b, input, num_spatial_dims, in_size, out_size, channels, + b, input, input_type, num_spatial_dims, in_size, out_size, channels, align_corners_, is_kernel_bilinear); in_size = out_size; } } else { output = ResizeUsingDilationAndConvolution( - b, input, num_spatial_dims, in_size, out_size, channels, + b, input, input_type, num_spatial_dims, in_size, out_size, channels, align_corners_, is_kernel_bilinear); in_size = out_size; } @@ -631,19 +652,19 @@ class ResizeBilinearGradOp : public XlaOpKernel { std::vector next_grad_size = {(in_size[0] - 1) * 2 + 1, (in_size[1] - 1) * 2 + 1}; output = ResizeUsingDilationAndConvolutionGradOp( - b, grad, num_spatial_dims, in_size, next_grad_size, channels, - align_corners_, true); + b, grad, xla::F32, num_spatial_dims, in_size, next_grad_size, + channels, align_corners_, true); grad = output; in_size = next_grad_size; } else { output = ResizeUsingDilationAndConvolutionGradOp( - b, grad, num_spatial_dims, in_size, grad_size, channels, + b, grad, xla::F32, num_spatial_dims, in_size, grad_size, channels, align_corners_, true); in_size = grad_size; } } else { output = ResizeUsingDilationAndConvolutionGradOp( - b, grad, num_spatial_dims, in_size, grad_size, channels, + b, grad, xla::F32, num_spatial_dims, in_size, grad_size, channels, align_corners_, true); in_size = grad_size; } diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 75fa73e1e4..490e80e09d 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -2469,7 +2469,6 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): [1, target_height, target_width, 1]) self.assertAllClose(resized, expected, atol=1e-05) - @test_util.disable_xla("b/124291162") # Incorrect literal type def testResizeUpAlignCornersTrue(self): img_shape = [1, 3, 2, 1] data = [6, 3, 3, 6, 6, 9] @@ -3616,7 +3615,8 @@ class TotalVariationTest(test_util.TensorFlowTestCase): # If we negate all pixel-values then the total variation is unchanged. self._test(-a, tot_var) - # Scale the pixel-values by a float. This scales the total variation as well. + # Scale the pixel-values by a float. This scales the total variation as + # well. b = 1.1 * a self._test(b, 1.1 * tot_var) -- GitLab From 081618315c8cd7c87db1e83493473da3a9030dc5 Mon Sep 17 00:00:00 2001 From: Thomas Joerg Date: Fri, 15 Feb 2019 03:51:07 -0800 Subject: [PATCH 193/351] Add VLOGing for DeallocateRaw similar to AllocateRaw. PiperOrigin-RevId: 234117289 --- tensorflow/core/common_runtime/bfc_allocator.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/common_runtime/bfc_allocator.cc b/tensorflow/core/common_runtime/bfc_allocator.cc index c7e535cc80..2ca5d85e66 100644 --- a/tensorflow/core/common_runtime/bfc_allocator.cc +++ b/tensorflow/core/common_runtime/bfc_allocator.cc @@ -391,6 +391,7 @@ void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) { } void BFCAllocator::DeallocateRaw(void* ptr) { + VLOG(1) << "DeallocateRaw " << Name() << " " << RequestedSize(ptr); DeallocateRawInternal(ptr); retry_helper_.NotifyDealloc(); } -- GitLab From 6f5961fa73902597785edaef1fdbe12c03711c2a Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 15 Feb 2019 13:51:47 +0100 Subject: [PATCH 194/351] Update doc for he_normal and lecun_normal according to issue 25564 --- tensorflow/python/ops/init_ops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index c0b05b426d..caad28344f 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -1322,8 +1322,9 @@ def lecun_normal(seed=None): """LeCun normal initializer. It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(1 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. + with standard deviation (after truncation) given by + `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of + input units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. @@ -1372,8 +1373,9 @@ def he_normal(seed=None): """He normal initializer. It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(2 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. + with standard deviation (after truncation) given by + `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of + input units in the weight tensor. Arguments: seed: A Python integer. Used to seed the random generator. -- GitLab From f26ea84fdbb13fff6b7979231db95dd20438645d Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Fri, 15 Feb 2019 05:07:29 -0800 Subject: [PATCH 195/351] [tf2xla] Remove MakeLinspaceTensor MakeLinspaceTensor is now unused. PiperOrigin-RevId: 234124584 --- tensorflow/compiler/tf2xla/xla_helpers.cc | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/compiler/tf2xla/xla_helpers.cc b/tensorflow/compiler/tf2xla/xla_helpers.cc index 7ae903e14f..7bb1ad2746 100644 --- a/tensorflow/compiler/tf2xla/xla_helpers.cc +++ b/tensorflow/compiler/tf2xla/xla_helpers.cc @@ -81,16 +81,6 @@ xla::XlaOp XlaHelpers::FloatLiteral(xla::XlaBuilder* b, DataType data_type, return Status::OK(); } -template -static Tensor MakeLinspaceTensor(const TensorShape& shape, int64 depth) { - Tensor linspace(DataTypeToEnum::v(), shape); - auto linspace_flat = linspace.flat(); - for (int64 i = 0; i < depth; ++i) { - linspace_flat(i) = i; - } - return linspace; -} - Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis, DataType index_type, const TensorShape& indices_shape, const xla::XlaOp& indices, const xla::XlaOp& on_value, -- GitLab From 789887e5bd4a74bc888fc192278578fb67cfc70a Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Fri, 15 Feb 2019 06:00:13 -0800 Subject: [PATCH 196/351] Don't drop the layout constraint when cloning a custom call. Also compare the layout constraints in IdenticalSlowPath(). PiperOrigin-RevId: 234129614 --- .../compiler/xla/service/hlo_instructions.cc | 15 +++++++++++++++ tensorflow/compiler/xla/tests/custom_call_test.cc | 6 ++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index f42b4afa5d..69e88dd4e8 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -2046,6 +2046,17 @@ bool HloCustomCallInstruction::IdenticalSlowPath( if (batch_group_count_ != casted_other.batch_group_count_) { return false; } + if (layout_constrained() != casted_other.layout_constrained()) { + return false; + } + if (layout_constrained()) { + for (int64 i = 0; i < operand_shapes_with_layout_.size(); ++i) { + if (!ShapeUtil::Equal(operand_shapes_with_layout_[i], + casted_other.operand_shapes_with_layout_[i])) { + return false; + } + } + } return custom_call_target_ == casted_other.custom_call_target_ && opaque_ == casted_other.opaque_; } @@ -2056,6 +2067,10 @@ HloCustomCallInstruction::CloneWithNewOperandsImpl( HloCloneContext* context) const { auto cloned = absl::make_unique( shape, new_operands, custom_call_target(), opaque()); + if (layout_constrained()) { + cloned->layout_constrained_ = true; + cloned->operand_shapes_with_layout_ = operand_shapes_with_layout(); + } if (window_ != nullptr) { cloned->set_window(*window_); } diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index cad43d1b55..4687ed61a7 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -172,8 +172,10 @@ XLA_TEST_F(CustomCallTest, LayoutConstrained) { const Shape& r2f32_dim0_major = ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}); - b.AddInstruction(HloInstruction::CreateCustomCall( + auto custom_call = b.AddInstruction(HloInstruction::CreateCustomCall( r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major})); + b.AddInstruction( + custom_call->CloneWithNewOperands(r2f32_dim0_major, {custom_call})); module->AddEntryComputation(b.Build()); ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); @@ -182,7 +184,7 @@ XLA_TEST_F(CustomCallTest, LayoutConstrained) { Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); Literal result = ExecuteAndTransfer(std::move(module), {&argument}); - LiteralTestUtil::ExpectR2Equal({{2.f, 3.f}, {4.f, 5.f}}, result); + LiteralTestUtil::ExpectR2Equal({{3.f, 4.f}, {5.f, 6.f}}, result); } XLA_TEST_F(CustomCallTest, TupleOutput) { -- GitLab From 8b41d6ea21fc8e9ddda2b5acfac0412cde96ae1e Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Fri, 15 Feb 2019 06:19:17 -0800 Subject: [PATCH 197/351] Fix instruction postoder with channel instructions PiperOrigin-RevId: 234131715 --- tensorflow/compiler/xla/service/BUILD | 2 + .../compiler/xla/service/hlo_computation.cc | 107 ++++++++++-------- .../compiler/xla/service/hlo_computation.h | 14 +-- .../xla/service/hlo_computation_test.cc | 32 ++++++ .../compiler/xla/service/hlo_reachability.cc | 45 ++++++-- 5 files changed, 133 insertions(+), 67 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 33ac51ca4b..dc5ea1c1f6 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2204,6 +2204,8 @@ tf_cc_test( "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/service:hlo_matchers", + "//tensorflow/compiler/xla/service:hlo_parser", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "@com_google_absl//absl/container:flat_hash_map", diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 40fe91398b..817e15f9ff 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -296,7 +296,7 @@ void ComputeComputationPostOrder(HloComputation* computation, } // namespace void HloComputation::ComputeInstructionPostOrder( - const HloComputation::ChannelDependencyMap& channel_dependency_map, + const HloComputation::ChannelDependencyGroup& channel_dependency_group, std::vector* post_order, HloInstruction* root, absl::flat_hash_map* visited) const { std::vector dfs_stack; @@ -320,66 +320,75 @@ void HloComputation::ComputeInstructionPostOrder( visited->insert({current, kVisiting}); - // Add the operands to the stack in reverse order so the first operand is - // processed first. This will produce a more natural ordering and a nicer - // result for things like HLO stringification. - const auto& operands = current->operands(); - for (int64 i = operands.size() - 1; i >= 0; --i) { - dfs_stack.emplace_back(operands[i]); - } - - for (HloInstruction* op : current->control_predecessors()) { - dfs_stack.emplace_back(op); - } - - // Add inputs for send->recv_done dependencies and all-reduce - // dependencies. - switch (current->opcode()) { - case HloOpcode::kRecvDone: { - auto it = channel_dependency_map.find(current->channel_id()); - if (it != channel_dependency_map.end()) { - for (HloInstruction* op : it->second) { - dfs_stack.emplace_back(op); - } - } - break; + const auto get_channel_id = + [](HloInstruction* inst) -> absl::optional { + switch (inst->opcode()) { + case HloOpcode::kRecvDone: + return inst->channel_id(); + case HloOpcode::kAllReduce: + return inst->all_reduce_id(); + default: + return absl::nullopt; } - case HloOpcode::kAllReduce: { - auto all_reduce_id = current->all_reduce_id(); - if (all_reduce_id) { - auto it = channel_dependency_map.find(all_reduce_id.value()); - if (it != channel_dependency_map.end()) { - for (HloInstruction* op : it->second) { - dfs_stack.emplace_back(op); - } - } + }; + + // When adding a predecessor to the dfs_stack, we need to also add its + // associated channel dependencies. + const auto add_dfs_stack = [&](HloInstruction* inst) { + auto channel_id = get_channel_id(inst); + if (channel_id && channel_dependency_group.count(*channel_id)) { + auto it = channel_dependency_group.find(*channel_id); + for (HloInstruction* cinst : it->second) { + dfs_stack.emplace_back(cinst); } - break; + } else { + dfs_stack.emplace_back(inst); } - default: - break; + }; + + const auto add_predecessors = [&](HloInstruction* inst) { + // Add the operands to the stack in reverse order so the first operand is + // processed first. This will produce a more natural ordering and a nicer + // result for things like HLO stringification. + const auto& operands = inst->operands(); + for (int64 i = operands.size() - 1; i >= 0; --i) { + add_dfs_stack(operands[i]); + } + + for (HloInstruction* op : inst->control_predecessors()) { + add_dfs_stack(op); + } + }; + + // If the current instruction is a channel instruction, add the dependencies + // from all associated instructions of the channel. + auto channel_id = get_channel_id(current); + if (channel_id && channel_dependency_group.count(*channel_id)) { + auto it = channel_dependency_group.find(*channel_id); + for (HloInstruction* cinst : it->second) { + add_predecessors(cinst); + } + } else { + add_predecessors(current); } } } -HloComputation::ChannelDependencyMap +HloComputation::ChannelDependencyGroup HloComputation::ComputeChannelDependencies() const { - ChannelDependencyMap channel_dependency_map; + ChannelDependencyGroup channel_dependency_group; for (const auto& instruction : instructions_) { switch (instruction->opcode()) { - case HloOpcode::kSend: { - channel_dependency_map[instruction->channel_id()].push_back( + case HloOpcode::kSend: + case HloOpcode::kRecvDone: + channel_dependency_group[instruction->channel_id()].push_back( instruction.get()); break; - } case HloOpcode::kAllReduce: { auto all_reduce_id = instruction->all_reduce_id(); if (all_reduce_id) { - auto& dependencies = channel_dependency_map[all_reduce_id.value()]; - absl::c_copy(instruction->operands(), - std::back_inserter(dependencies)); - absl::c_copy(instruction->control_predecessors(), - std::back_inserter(dependencies)); + channel_dependency_group[all_reduce_id.value()].push_back( + instruction.get()); } break; } @@ -387,11 +396,11 @@ HloComputation::ComputeChannelDependencies() const { break; } } - return channel_dependency_map; + return channel_dependency_group; } std::vector HloComputation::MakeInstructionPostOrder() const { - auto channel_dependency_map = ComputeChannelDependencies(); + auto channel_dependency_group = ComputeChannelDependencies(); std::vector post_order; post_order.reserve(instruction_count()); std::vector trace_instructions; @@ -404,7 +413,7 @@ std::vector HloComputation::MakeInstructionPostOrder() const { // users). trace_instructions.push_back(instruction.get()); } else if (instruction->users().empty()) { - ComputeInstructionPostOrder(channel_dependency_map, &post_order, + ComputeInstructionPostOrder(channel_dependency_group, &post_order, instruction.get(), &visited); } } diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index fd1f990431..212dfa15a1 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -369,13 +369,13 @@ class HloComputation { // channel complete). bool IsRemovable(const HloInstruction* instruction); - // Returns a map from channel-id to directed dependencies of the channel - // instructions. For send&recv pairs it means the send instruction and for - // all-reduce the union of the dependencies for all participating - // instructions. - using ChannelDependencyMap = + // Returns a map from channel-id to the group of instructions associated with + // the channel. These instructions will be considered as a single node for + // dependency purposes. Send and RecvDone are in the group, and AllReduces + // with the same channel id are in the group. + using ChannelDependencyGroup = absl::flat_hash_map>; - ChannelDependencyMap ComputeChannelDependencies() const; + ChannelDependencyGroup ComputeChannelDependencies() const; // Returns true if this computation has a side effect. A computation has a // side effect if it contains one or more instructions with a side effect. @@ -438,7 +438,7 @@ class HloComputation { enum VisitState { kVisiting, kVisited }; void ComputeInstructionPostOrder( - const HloComputation::ChannelDependencyMap& channel_dependency_map, + const HloComputation::ChannelDependencyGroup& channel_dependency_map, std::vector* post_order, HloInstruction* root, absl::flat_hash_map* visited) const; diff --git a/tensorflow/compiler/xla/service/hlo_computation_test.cc b/tensorflow/compiler/xla/service/hlo_computation_test.cc index 3b88e9745c..fe37ca6b39 100644 --- a/tensorflow/compiler/xla/service/hlo_computation_test.cc +++ b/tensorflow/compiler/xla/service/hlo_computation_test.cc @@ -24,7 +24,9 @@ limitations under the License. #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/pattern_matcher.h" #include "tensorflow/compiler/xla/service/pattern_matcher_gmock.h" #include "tensorflow/compiler/xla/shape_util.h" @@ -37,6 +39,7 @@ namespace xla { namespace { namespace m = match; +namespace op = xla::testing::opcode_matchers; using ::testing::ElementsAre; using ::testing::UnorderedElementsAre; @@ -668,5 +671,34 @@ TEST_F(HloComputationTest, DeepEquality) { EXPECT_FALSE(*computation_c == *computation_b); } +// Tests that cross-module AllReduce instructions are ordered before all their +// predecessors and after all their successors. +TEST_F(HloComputationTest, InstructionPostOrderWithAllReduce) { + const char* const hlo_string = R"( +HloModule Module + +add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) +} + +ENTRY entry { + param = f32[128] parameter(0), sharding={maximal device=0} + crs0 = f32[128] all-reduce(param), + replica_groups={{0}}, all_reduce_id=1, barrier="", to_apply=add, + sharding={maximal device=0} + crs1 = f32[128] all-reduce(param), + replica_groups={{0}}, all_reduce_id=1, barrier="", to_apply=add, + sharding={maximal device=1} + add = f32[128] add(crs0, crs0), sharding={maximal device=0} + ROOT t = (f32[128], f32[128]) tuple(add, crs1) +})"; + TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(hlo_string)); + EXPECT_THAT(module->entry_computation()->MakeInstructionPostOrder(), + ElementsAre(op::Parameter(), op::AllReduce(), op::AllReduce(), + op::Add(), op::Tuple())); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_reachability.cc b/tensorflow/compiler/xla/service/hlo_reachability.cc index 0fced7f15b..b7f507b118 100644 --- a/tensorflow/compiler/xla/service/hlo_reachability.cc +++ b/tensorflow/compiler/xla/service/hlo_reachability.cc @@ -77,28 +77,51 @@ std::unique_ptr HloReachabilityMap::Build( const HloComputation* computation) { const auto& all = computation->MakeInstructionPostOrder(); auto result = absl::make_unique(all); - auto channel_dependency_map = computation->ComputeChannelDependencies(); + auto channel_group = computation->ComputeChannelDependencies(); - std::vector inputs; for (const HloInstruction* hlo : all) { - inputs.assign(hlo->operands().begin(), hlo->operands().end()); - inputs.insert(inputs.end(), hlo->control_predecessors().begin(), - hlo->control_predecessors().end()); + std::vector inputs; + const auto add_input = [&channel_group, &inputs](HloInstruction* input) { + inputs.push_back(input); + if (input->opcode() == HloOpcode::kAllReduce && input->all_reduce_id()) { + auto it = channel_group.find(*input->all_reduce_id()); + if (it != channel_group.end()) { + inputs.insert(inputs.end(), it->second.begin(), it->second.end()); + } + } + }; + + const auto add_dependencies = [&add_input](const HloInstruction* hlo) { + for (HloInstruction* operand : hlo->operands()) { + add_input(operand); + } + for (HloInstruction* predecessor : hlo->control_predecessors()) { + add_input(predecessor); + } + }; + + add_dependencies(hlo); switch (hlo->opcode()) { case HloOpcode::kRecvDone: { - auto it = channel_dependency_map.find(hlo->channel_id()); - if (it != channel_dependency_map.end()) { - absl::c_copy(it->second, std::back_inserter(inputs)); + auto it = channel_group.find(hlo->channel_id()); + if (it != channel_group.end()) { + for (HloInstruction* channel : it->second) { + if (channel->opcode() == HloOpcode::kSend) { + add_input(channel); + } + } } break; } case HloOpcode::kAllReduce: { auto all_reduce_id = hlo->all_reduce_id(); if (all_reduce_id) { - auto it = channel_dependency_map.find(all_reduce_id.value()); - if (it != channel_dependency_map.end()) { - absl::c_copy(it->second, std::back_inserter(inputs)); + auto it = channel_group.find(all_reduce_id.value()); + if (it != channel_group.end()) { + for (HloInstruction* all_reduce : it->second) { + add_dependencies(all_reduce); + } } } break; -- GitLab From 4978c1e0280b6d49f58c06e2e40639f037efe9cc Mon Sep 17 00:00:00 2001 From: Andr? Susano Pinto Date: Fri, 15 Feb 2019 06:34:17 -0800 Subject: [PATCH 198/351] Add test for missing functionality of shape inference when applying a function. This feature is necessary to be able to easily apply functions in graph mode for things like unknown batch size or variable image size/feature maps. Without it a user of a function with an input_signature in graph mode has to manually compute the shape and set it so the rest of the model building can take that into account. PiperOrigin-RevId: 234133298 --- tensorflow/python/eager/function_test.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 7c796ae218..2e2e45cf7f 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -529,6 +529,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase): var_t = resource_variable_ops.read_variable_op(var_handle, dtype=v.dtype) self.assertEqual(var_t.shape, tensor_shape.TensorShape([2, 2])) + def testShapeInferenceForMoreSpecificInput(self): + self.skipTest('b/124219898') + + def f(a): + return array_ops.reshape(a, [-1, 3]) + + signature = [tensor_spec.TensorSpec(None, dtypes.float32)] + compiled = def_function.function(f, input_signature=signature) + + with ops.Graph().as_default(): + inputs = array_ops.zeros([10, 10, 3]) + self.assertAllEqual(f(inputs).shape, compiled(inputs).shape) + def testFuncListAttr(self): @function.defun -- GitLab From 0777564ff7ff0f6e0a50a14fbc8fe9dade97ab98 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 06:51:31 -0800 Subject: [PATCH 199/351] Remove boilerplace duplication in GPU SVD implementation. PiperOrigin-RevId: 234135875 --- tensorflow/core/kernels/svd_op_gpu.cu.cc | 166 +++++++++-------------- 1 file changed, 61 insertions(+), 105 deletions(-) diff --git a/tensorflow/core/kernels/svd_op_gpu.cu.cc b/tensorflow/core/kernels/svd_op_gpu.cu.cc index 8c3a58b108..9e308cfc02 100644 --- a/tensorflow/core/kernels/svd_op_gpu.cu.cc +++ b/tensorflow/core/kernels/svd_op_gpu.cu.cc @@ -93,9 +93,48 @@ class SvdOpGpu : public AsyncOpKernel { } void RunSVD(OpKernelContext* context, DoneCallback done, int64 m, int64 n, - int64 p, int64 batch_size, Scalar* input_ptr, - RealScalar* outputS_ptr, Scalar* outputU_ptr, - Scalar* outputVT_ptr, int* dev_info_ptr, CudaSolver* solver) { + int64 p, Tensor& M_copy, Tensor* S, Tensor* U, Tensor* V, + std::unique_ptr solver) { + // Compute U S V* = M. + // 1. cuSolver works in column-major rather than row-major. + // 2. Gesvd returns V*. + // 3. Hence M should be transposed before input and U (rather than V) should + // be transposed on output. + + Tensor u_copy; + if (compute_uv_) { + TensorShape u_shape; + if (full_matrices_) { + u_shape = U->shape(); + } else { + TensorShape shapeRaw = M_copy.shape(); + shapeRaw.RemoveLastDims(2); + u_shape = shapeRaw; + u_shape.AddDim(p); + u_shape.AddDim(m); + } + OP_REQUIRES_OK_ASYNC( + context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy), + done); + } + + // get the pointers to the data + Scalar* input_ptr; + RealScalar* outputS_ptr; + Scalar* outputU_ptr = NULL; + Scalar* outputV_ptr = NULL; + auto input_reshaped = M_copy.template flat_inner_dims(); + input_ptr = input_reshaped.data(); + outputS_ptr = S->template flat_inner_dims().data(); + if (compute_uv_) { + outputU_ptr = u_copy.template flat_inner_dims().data(); + outputV_ptr = V->template flat_inner_dims().data(); + } + const int64 batch_size = input_reshaped.dimension(0); + std::vector dev_info; + dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd")); + int* dev_info_ptr = dev_info.back().mutable_data(); + // Save the input matrix // Needed for the n=1 fix, see below, since SVD destroys the input Tensor input_copy; @@ -121,12 +160,12 @@ class SvdOpGpu : public AsyncOpKernel { if (compute_uv_) { if (full_matrices_) { outputU = outputU_ptr + batch * m * m; - outputVT = outputVT_ptr + batch * n * n; + outputVT = outputV_ptr + batch * n * n; jobu = 'A'; jobvt = 'A'; } else { outputU = outputU_ptr + batch * m * p; - outputVT = outputVT_ptr + batch * n * p; + outputVT = outputV_ptr + batch * n * p; jobu = 'S'; jobvt = 'S'; } @@ -155,17 +194,24 @@ class SvdOpGpu : public AsyncOpKernel { if (compute_uv_ && n == 1) { // 1. compute the (batched) sum const GPUDevice& d = context->eigen_device(); - d.memset(outputVT_ptr, 0, batch_size * sizeof(Scalar)); + d.memset(outputV_ptr, 0, batch_size * sizeof(Scalar)); Cuda2DLaunchConfig cfg2D = GetCuda2DLaunchConfig(batch_size, m, d); ComputeValueOfVKernel<<>>( cfg2D, m, full_matrices_ ? m : p, input_copy.flat().data(), - outputU_ptr, outputS_ptr, outputVT_ptr); + outputU_ptr, outputS_ptr, outputV_ptr); // 2. clamp V to -1 or +1 CudaLaunchConfig cfg1D = GetCudaLaunchConfig(batch_size, d); ExtractSignOfVKernel<<>>(cfg1D, outputVT_ptr); + d.stream()>>>(cfg1D, outputV_ptr); } + + if (compute_uv_) { + auto device = context->eigen_device(); + OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done); + } + + CheckResult(context, std::move(done), dev_info, std::move(solver)); } void CheckResult(OpKernelContext* context, DoneCallback done, @@ -192,10 +238,9 @@ class SvdOpGpu : public AsyncOpKernel { void PerformSVD_MgeqN(OpKernelContext* context, DoneCallback done, int64 m, int64 n, int64 p, const Tensor& M, Tensor* S, Tensor* U, Tensor* V) { + // Transpose M, because cuSolver expects it to be column-major TensorShape shapeRaw = M.shape(); shapeRaw.RemoveLastDims(2); - - // Transpose M, because cuSolver expects it to be column-major TensorShape input_shape = shapeRaw; input_shape.AddDim(n); input_shape.AddDim(m); @@ -210,58 +255,16 @@ class SvdOpGpu : public AsyncOpKernel { OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, M, &input_copy), done); - // I need to transpose U at the end - // Not V, because cuSolver work column-major - Tensor u_copy; - if (compute_uv_) { - TensorShape u_shape; - if (full_matrices_) { - u_shape = U->shape(); - } else { - u_shape = shapeRaw; - u_shape.AddDim(p); - u_shape.AddDim(m); - } - OP_REQUIRES_OK_ASYNC( - context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy), - done); - } - - // get the pointers to the data - Scalar* input_ptr; - RealScalar* outputS_ptr; - Scalar* outputU_ptr = NULL; - Scalar* outputV_ptr = NULL; - auto input_reshaped = input_copy.template flat_inner_dims(); - input_ptr = input_reshaped.data(); - outputS_ptr = S->template flat_inner_dims().data(); - if (compute_uv_) { - outputU_ptr = u_copy.template flat_inner_dims().data(); - outputV_ptr = V->template flat_inner_dims().data(); - } - - // call the SVD - const int64 batch_size = input_reshaped.dimension(0); - std::vector dev_info; - dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd")); - RunSVD(context, done, m, n, p, batch_size, input_ptr, outputS_ptr, - outputU_ptr, outputV_ptr, dev_info.back().mutable_data(), - solver.get()); - - // Transpose U - if (compute_uv_) { - OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done); - } - - // now check if the SVD operation succeeded or not - CheckResult(context, std::move(done), dev_info, std::move(solver)); + // Call the SVD: compute U S V* = M. + RunSVD(context, done, m, n, p, input_copy, S, U, V, std::move(solver)); } // The SVD if m < n void PerformSVD_MlessN(OpKernelContext* context, DoneCallback done, int64 m, int64 n, int64 p, const Tensor& M, Tensor* S, Tensor* U, Tensor* V) { - // Perform the SVD on M' + // Perform the SVD on M'. cuSolver works column major so don't need to + // transpose M. // Reuse the input buffer or make a copy for the SVD depending on whether // this op owns the input buffer exclusively. This is needed because the @@ -281,55 +284,8 @@ class SvdOpGpu : public AsyncOpKernel { M.NumElements() * sizeof(Scalar)); } - // I need to transpose V at the end - Tensor v_copy; - if (compute_uv_) { - TensorShape v_shape; - if (full_matrices_) { - v_shape = V->shape(); - } else { - TensorShape shapeRaw = M.shape(); - shapeRaw.RemoveLastDims(2); - v_shape = shapeRaw; - v_shape.AddDim(p); - v_shape.AddDim(n); - } - OP_REQUIRES_OK_ASYNC( - context, solver->allocate_scoped_tensor(V->dtype(), v_shape, &v_copy), - done); - } - - // get the pointers to the data - Scalar* input_ptr; - RealScalar* outputS_ptr; - Scalar* outputU_ptr = NULL; - Scalar* outputV_ptr = NULL; - auto input_reshaped = input_copy.template flat_inner_dims(); - input_ptr = input_reshaped.data(); - outputS_ptr = S->template flat_inner_dims().data(); - if (compute_uv_) { - // Note that U and V are flipped - outputU_ptr = v_copy.template flat_inner_dims().data(); - outputV_ptr = U->template flat_inner_dims().data(); - } - - // call the SVD - const int64 batch_size = input_reshaped.dimension(0); - std::vector dev_info; - dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, "gesvd")); - // Note that m and n are flipped - RunSVD(context, done, n, m, p, batch_size, input_ptr, outputS_ptr, - outputU_ptr, outputV_ptr, dev_info.back().mutable_data(), - solver.get()); - - // Transpose V - if (compute_uv_) { - auto device = context->eigen_device(); - OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, v_copy, V), done); - } - - // now check if the SVD operation succeeded or not - CheckResult(context, std::move(done), dev_info, std::move(solver)); + // Call the SVD: compute V S U* = M*. + RunSVD(context, done, n, m, p, input_copy, S, V, U, std::move(solver)); } void ComputeAsync(OpKernelContext* context, DoneCallback done) final { -- GitLab From 34cd80179154e1fc456b18195b267bddf5f1d1c0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 07:19:18 -0800 Subject: [PATCH 200/351] Wrap GesvdjBatched from cuSolver and associated helper utilities. PiperOrigin-RevId: 234139308 --- tensorflow/core/kernels/cuda_solvers.cc | 44 +++++++++++++++++++++++++ tensorflow/core/kernels/cuda_solvers.h | 5 +++ 2 files changed, 49 insertions(+) diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc index 39d0a998fd..82d92388d4 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -643,6 +643,50 @@ static inline Status GesvdImpl( TF_CALL_LAPACK_TYPES_NO_COMPLEX(GESVD_INSTANCE); +template +static inline Status GesvdjBatchedImpl(BufSizeFnT bufsize, SolverFnT solver, + CudaSolver* cuda_solver, + OpKernelContext* context, + cusolverDnHandle_t cusolver_dn_handle, + cusolverEigMode_t jobz, int m, int n, + Scalar* A, int lda, Scalar* S, Scalar* U, + int ldu, Scalar* V, int ldv, + int* dev_lapack_info, int batch_size) { + mutex_lock lock(handle_map_mutex); + /* Get amount of workspace memory required. */ + int lwork; + /* Default parameters for gesvdj and gesvdjBatched. */ + gesvdjInfo_t svdj_info; + TF_RETURN_IF_CUSOLVER_ERROR(cusolverDnCreateGesvdjInfo(&svdj_info)); + TF_RETURN_IF_CUSOLVER_ERROR(bufsize( + cusolver_dn_handle, jobz, m, n, CUDAComplex(A), lda, S, CUDAComplex(U), + ldu, CUDAComplex(V), ldv, &lwork, svdj_info, batch_size)); + /* Allocate device memory for workspace. */ + auto dev_workspace = + cuda_solver->GetScratchSpace(lwork, "", /* on_host */ false); + TF_RETURN_IF_CUSOLVER_ERROR(solver( + cusolver_dn_handle, jobz, m, n, CUDAComplex(A), lda, S, CUDAComplex(U), + ldu, CUDAComplex(V), ldv, CUDAComplex(dev_workspace.mutable_data()), + lwork, dev_lapack_info, svdj_info, batch_size)); + TF_RETURN_IF_CUSOLVER_ERROR(cusolverDnDestroyGesvdjInfo(svdj_info)); + return Status::OK(); +} + +#define GESVDJBATCHED_INSTANCE(Scalar, type_prefix) \ + template <> \ + Status CudaSolver::GesvdjBatched( \ + cusolverEigMode_t jobz, int m, int n, Scalar* dev_A, int lda, \ + Scalar* dev_S, Scalar* dev_U, int ldu, Scalar* dev_V, int ldv, \ + int* dev_lapack_info, int batch_size) { \ + return GesvdjBatchedImpl(DN_BUFSIZE_FN(gesvdjBatched, type_prefix), \ + DN_SOLVER_FN(gesvdjBatched, type_prefix), this, \ + context_, cusolver_dn_handle_, jobz, m, n, dev_A, \ + lda, dev_S, dev_U, ldu, dev_V, ldv, \ + dev_lapack_info, batch_size); \ + } + +TF_CALL_LAPACK_TYPES_NO_COMPLEX(GESVDJBATCHED_INSTANCE); + //============================================================================= // Wrappers of cuBlas computational methods begin here. // diff --git a/tensorflow/core/kernels/cuda_solvers.h b/tensorflow/core/kernels/cuda_solvers.h index 1fc344731c..fa8b4e2415 100644 --- a/tensorflow/core/kernels/cuda_solvers.h +++ b/tensorflow/core/kernels/cuda_solvers.h @@ -312,6 +312,11 @@ class CudaSolver { Status Gesvd(signed char jobu, signed char jobvt, int m, int n, Scalar* dev_A, int lda, Scalar* dev_S, Scalar* dev_U, int ldu, Scalar* dev_VT, int ldvt, int* dev_lapack_info) TF_MUST_USE_RESULT; + template + Status GesvdjBatched(cusolverEigMode_t jobz, int m, int n, Scalar* dev_A, + int lda, Scalar* dev_S, Scalar* dev_U, int ldu, + Scalar* dev_V, int ldv, int* dev_lapack_info, + int batch_size); private: OpKernelContext* context_; // not owned. -- GitLab From 14abf581047502dc17b8fcbbc5e5ab8bac5311ec Mon Sep 17 00:00:00 2001 From: Smit Hinsu Date: Fri, 15 Feb 2019 07:31:48 -0800 Subject: [PATCH 201/351] Use function-local static pointer for globals in TensorRT C++ style guide forbids globals that are not trivially destructible. PiperOrigin-RevId: 234140821 --- tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc index 2ce41b82d2..3f4b3732b0 100644 --- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc @@ -89,7 +89,7 @@ TrtCandidateSelector::TrtCandidateSelector( Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) { // TODO(laigd): move this set to TrtNodeValidator where it should belong. // LINT.IfChange - static const std::set candidate_ops = { + static const auto* candidate_ops = new std::set{ "Abs", "Add", "AvgPool", @@ -142,9 +142,9 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) { "Transpose", }; bool is_supported_op_type = - (candidate_ops.count(node->type_string()) || + (candidate_ops->count(node->type_string()) || PluginFactoryTensorRT::GetInstance()->IsPlugin(node->type_string())); - static const std::set quantize_ops = { + static const auto* quantize_ops = new std::set{ "QuantizeAndDequantizeV2", "QuantizeAndDequantizeV3", "FakeQuantWithMinMaxVars", @@ -154,7 +154,7 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) { // these ops to the relevant tensors. This happens regardless of the value of // use_calibration. if (precision_mode_ == TrtPrecisionMode::INT8 && - quantize_ops.count(node->type_string())) { + quantize_ops->count(node->type_string())) { is_supported_op_type = true; } // LINT.ThenChange(//tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc) -- GitLab From ad78b5ea16e0fe76d9eb4de93bc1bcf9729fbf5a Mon Sep 17 00:00:00 2001 From: Vojtech Bardiovsky Date: Fri, 15 Feb 2019 07:57:14 -0800 Subject: [PATCH 202/351] Add fine-tuning integration test. PiperOrigin-RevId: 234143934 --- .../export_simple_text_embedding.py | 44 ++++++++++++++----- .../use_model_in_sequential_keras.py | 37 ++++++++++------ 2 files changed, 58 insertions(+), 23 deletions(-) diff --git a/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py index 5149536592..a05dd3f3ab 100644 --- a/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py +++ b/tensorflow/examples/saved_model/integration_tests/export_simple_text_embedding.py @@ -18,16 +18,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os +import tempfile from absl import app from absl import flags import tensorflow as tf +# TODO(vbardiovsky): remove these when symbols are public. +from tensorflow.python.ops import lookup_ops +from tensorflow.python.training.checkpointable import tracking + FLAGS = flags.FLAGS flags.DEFINE_string("export_dir", None, "Directory to export SavedModel.") +def write_vocabulary_file(vocabulary): + """Write temporary vocab file for module construction.""" + tmpdir = tempfile.mkdtemp() + vocabulary_file = os.path.join(tmpdir, "tokens.txt") + with tf.io.gfile.GFile(vocabulary_file, "w") as f: + for entry in vocabulary: + f.write(entry + "\n") + return vocabulary_file + + class TextEmbeddingModel(tf.train.Checkpoint): """Text embedding model. @@ -35,10 +51,20 @@ class TextEmbeddingModel(tf.train.Checkpoint): sentence embedding. """ - def __init__(self, emb_dim, buckets): + def __init__(self, vocabulary, emb_dim, oov_buckets): super(TextEmbeddingModel, self).__init__() - self._buckets = buckets - self._embeddings = tf.Variable(tf.random.uniform(shape=[buckets, emb_dim])) + self._oov_buckets = oov_buckets + self._vocabulary_file = tracking.TrackableAsset( + write_vocabulary_file(vocabulary)) + self._total_size = len(vocabulary) + oov_buckets + self._table = lookup_ops.index_table_from_file( + vocabulary_file=self._vocabulary_file, + num_oov_buckets=self._oov_buckets, + hasher_spec=lookup_ops.FastHashSpec) + self.embeddings = tf.Variable( + tf.random.uniform(shape=[self._total_size, emb_dim])) + self.variables = [self.embeddings] + self.trainable_variables = self.variables def _tokenize(self, sentences): # Perform a minimalistic text preprocessing by removing punctuation and @@ -52,19 +78,16 @@ class TextEmbeddingModel(tf.train.Checkpoint): sparse_tokens, _ = tf.sparse.fill_empty_rows(sparse_tokens, tf.constant("")) # Deal with a corner case: all sentences are empty. sparse_tokens = tf.sparse.reset_shape(sparse_tokens) + sparse_token_ids = self._table.lookup(sparse_tokens.values) - return (sparse_tokens.indices, self._words_to_indices(sparse_tokens.values), - sparse_tokens.dense_shape) - - def _words_to_indices(self, words): - return tf.strings.to_hash_bucket(words, self._buckets) + return (sparse_tokens.indices, sparse_token_ids, sparse_tokens.dense_shape) @tf.function(input_signature=[tf.TensorSpec([None], tf.dtypes.string)]) def __call__(self, sentences): token_ids, token_values, token_dense_shape = self._tokenize(sentences) return tf.nn.safe_embedding_lookup_sparse( - embedding_weights=self._embeddings, + embedding_weights=self.embeddings, sparse_ids=tf.SparseTensor(token_ids, token_values, token_dense_shape), sparse_weights=None, combiner="sqrtn") @@ -73,7 +96,8 @@ class TextEmbeddingModel(tf.train.Checkpoint): def main(argv): del argv - module = TextEmbeddingModel(emb_dim=10, buckets=100) + vocabulary = ["cat", "is", "on", "the", "mat"] + module = TextEmbeddingModel(vocabulary=vocabulary, emb_dim=10, oov_buckets=10) tf.saved_model.save(module, FLAGS.export_dir) diff --git a/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py index cf9397f002..6c8aab8e23 100644 --- a/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py +++ b/tensorflow/examples/saved_model/integration_tests/use_model_in_sequential_keras.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Load and use RNN model stored as a SavedModel.""" +"""Load and use text embedding module in sequential Keras.""" from __future__ import absolute_import from __future__ import division @@ -24,7 +24,7 @@ from absl import flags import numpy as np import tensorflow as tf -# TODO(vbardiovsky): Remove when load is available. +# TODO(vbardiovsky): Remove when load symbol is public. from tensorflow.examples.saved_model.integration_tests import util from tensorflow.python.saved_model.load import load @@ -35,28 +35,39 @@ FLAGS = flags.FLAGS flags.DEFINE_string("model_dir", None, "Directory to load SavedModel from.") -def main(argv): - del argv - +def train(fine_tuning): + """Build a Keras model and train with mock data.""" features = np.array(["my first sentence", "my second sentence"]) labels = np.array([1, 0]) - dataset = tf.data.Dataset.from_tensor_slices((features, labels)) - embed = tf.saved_model.load(FLAGS.model_dir) + module = tf.saved_model.load(FLAGS.model_dir) # Create the sequential keras model. + l = tf.keras.layers model = tf.keras.Sequential() - model.add(util.CustomLayer(embed, batch_input_shape=[None], - output_shape=[10], dtype=tf.string)) - model.add(tf.keras.layers.Dense(100, activation="relu")) - model.add(tf.keras.layers.Dense(50, activation="relu")) - model.add(tf.keras.layers.Dense(1, activation="sigmoid")) + model.add(l.Reshape((), batch_input_shape=[None, 1], dtype=tf.string)) + model.add(util.CustomLayer(module, output_shape=[10], trainable=fine_tuning)) + model.add(l.Dense(100, activation="relu")) + model.add(l.Dense(50, activation="relu")) + model.add(l.Dense(1, activation="sigmoid")) + model.compile( - optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) + optimizer="adam", + loss="binary_crossentropy", + metrics=["accuracy"], + # TODO(b/124446120): Remove after fixed. + run_eagerly=True) model.fit_generator(generator=dataset.batch(1), epochs=5) +def main(argv): + del argv + + train(fine_tuning=False) + train(fine_tuning=True) + + if __name__ == "__main__": app.run(main) -- GitLab From 1258797b1110c46b4db33d49b0569d3e721b4e8f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 08:31:45 -0800 Subject: [PATCH 203/351] Improvements to the existing custom NEON path for 8bit quantized GEMV: 1. Drop the requirement that the output depth be a multiple of 4. We still process groups of 4 rows at a time, but if there remains a few rows at the end, we process the last 4 rows, possibly re-processing some already-processed rows. 2. Also use this fast GEMV path in Conv, not just in FullyConnected. Indeed, in some newer models, we see GEMV's being encoded as Conv instead of FullyConnected nodes. (Seen in MobileNet v2 tflite files). PiperOrigin-RevId: 234148907 --- .../internal/optimized/optimized_ops.h | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 1c0798c37c..f46ed0f3bf 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -867,10 +867,10 @@ inline void FullyConnectedAsGEMV( for (int k = 0; k < kPeel * input_size; k += 64) { optimized_ops_preload_l1_stream(filter_data + k); } - TFLITE_DCHECK(!(output_size % kPeel)); - const int32* bias_ptr = bias_data; - uint8* output_ptr = output_data; + TFLITE_DCHECK_GE(output_size, kPeel); + for (int out = 0; out < output_size; out += kPeel) { + out = std::min(out, output_size - kPeel); int32x4_t acc[kPeel]; for (int k = 0; k < kPeel; k++) { acc[k] = vdupq_n_s32(0); @@ -969,8 +969,7 @@ inline void FullyConnectedAsGEMV( vpadd_s32(pairwise_reduced_acc[2], pairwise_reduced_acc[3]); int32x4_t reduced = vcombine_s32(reduced_lo, reduced_hi); // Add bias values. - int32x4_t bias_vec = vld1q_s32(bias_ptr); - bias_ptr += 4; + int32x4_t bias_vec = vld1q_s32(bias_data + out); reduced = vaddq_s32(reduced, bias_vec); if (shift_left) { const int32 multiplier_power_of_two = 1 << output_shift; @@ -993,10 +992,11 @@ inline void FullyConnectedAsGEMV( // Apply the clamping from the activation function res8 = vmax_u8(res8, vdup_n_u8(output_activation_min)); res8 = vmin_u8(res8, vdup_n_u8(output_activation_max)); - // Store results to destination. Assumes 32bit alignment. - vst1_lane_u32(reinterpret_cast(output_ptr), - vreinterpret_u32_u8(res8), 0); - output_ptr += kPeel; + // Store results to destination. + vst1_lane_u8(output_data + out + 0, res8, 0); + vst1_lane_u8(output_data + out + 1, res8, 1); + vst1_lane_u8(output_data + out + 2, res8, 2); + vst1_lane_u8(output_data + out + 3, res8, 3); } } #endif // USE_NEON @@ -1054,14 +1054,16 @@ inline void FullyConnected( const int filter_dim_count = filter_shape.DimensionsCount(); const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); #ifdef USE_NEON - const int output_size = MatchingDim(filter_shape, filter_dim_count - 2, - output_shape, output_dim_count - 1); - if (batches == 1 && !(output_size % 4)) { - return FullyConnectedAsGEMV( - input_shape, input_data, input_offset, filter_shape, filter_data, - filter_offset, bias_shape, bias_data, output_offset, output_multiplier, - output_shift, output_activation_min, output_activation_max, - output_shape, output_data); + if (batches == 1) { + const int output_size = MatchingDim(filter_shape, filter_dim_count - 2, + output_shape, output_dim_count - 1); + if (output_size >= 4) { + return FullyConnectedAsGEMV( + input_shape, input_data, input_offset, filter_shape, filter_data, + filter_offset, bias_shape, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_shape, output_data); + } } #endif // USE_NEON const int filter_rows = filter_shape.Dims(filter_dim_count - 2); @@ -2084,6 +2086,21 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, TFLITE_DCHECK_EQ(output_cols, gemm_input_cols); TFLITE_DCHECK_EQ(filter_cols, gemm_input_rows); TFLITE_DCHECK_EQ(bias_shape.FlatSize(), output_rows); + +#ifdef USE_NEON + if (gemm_input_cols == 1 && output_rows >= 4) { + RuntimeShape fc_filter_shape{ + filter_shape.Dims(0), + filter_shape.Dims(filter_shape.DimensionsCount() - 1)}; + + return FullyConnectedAsGEMV( + *gemm_input_shape, gemm_input_data, input_offset, fc_filter_shape, + filter_data, filter_offset, bias_shape, bias_data, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_shape, output_data); + } +#endif + gemmlowp::MatrixMap filter_matrix( filter_data, filter_rows, filter_cols); gemmlowp::MatrixMap input_matrix( -- GitLab From 44985cbb76ad543282a19ffcf1d764e54e792ed9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 09:00:18 -0800 Subject: [PATCH 204/351] Internal change. PiperOrigin-RevId: 234153216 --- .../contrib/makefile/proto_text_cc_files.txt | 2 +- tensorflow/core/BUILD | 9 +- .../core/framework/allocator_registry.h | 1 + .../core/platform/default/build_config.bzl | 8 +- .../platform/default/human_readable_json.cc | 2 +- tensorflow/core/platform/default/protobuf.h | 47 ----- .../core/platform/default/protobuf_compiler.h | 25 --- .../core/platform/default/string_coding.cc | 30 --- .../core/platform/default/string_coding.h | 98 ---------- .../core/platform/{default => }/protobuf.cc | 8 +- tensorflow/core/platform/protobuf.h | 39 +++- tensorflow/core/platform/protobuf_compiler.h | 6 +- tensorflow/core/platform/tensor_coding.cc | 175 ++++++++++++++++++ tensorflow/core/platform/tensor_coding.h | 69 ++++++- 14 files changed, 285 insertions(+), 234 deletions(-) delete mode 100644 tensorflow/core/platform/default/protobuf.h delete mode 100644 tensorflow/core/platform/default/protobuf_compiler.h delete mode 100644 tensorflow/core/platform/default/string_coding.cc delete mode 100644 tensorflow/core/platform/default/string_coding.h rename tensorflow/core/platform/{default => }/protobuf.cc (72%) diff --git a/tensorflow/contrib/makefile/proto_text_cc_files.txt b/tensorflow/contrib/makefile/proto_text_cc_files.txt index 9ea94c7433..0a0ba36232 100644 --- a/tensorflow/contrib/makefile/proto_text_cc_files.txt +++ b/tensorflow/contrib/makefile/proto_text_cc_files.txt @@ -40,7 +40,6 @@ tensorflow/core/lib/wav/wav_io.cc tensorflow/core/platform/cpu_info.cc tensorflow/core/platform/default/logging.cc tensorflow/core/platform/default/mutex.cc -tensorflow/core/platform/default/protobuf.cc tensorflow/core/platform/default/tracing.cc tensorflow/core/platform/denormal.cc tensorflow/core/platform/env.cc @@ -53,6 +52,7 @@ tensorflow/core/platform/posix/error.cc tensorflow/core/platform/posix/load_library.cc tensorflow/core/platform/posix/port.cc tensorflow/core/platform/posix/posix_file_system.cc +tensorflow/core/platform/protobuf.cc tensorflow/core/platform/protobuf_util.cc tensorflow/core/platform/setround.cc tensorflow/core/platform/tensor_coding.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index fb93e8ddd3..fd4026e99c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -128,7 +128,6 @@ load( "tf_additional_libdevice_srcs", "tf_additional_minimal_lib_srcs", "tf_additional_mpi_lib_defines", - "tf_additional_proto_compiler_hdrs", "tf_additional_proto_hdrs", "tf_additional_proto_srcs", "tf_additional_test_deps", @@ -418,9 +417,8 @@ cc_library( name = "platform_protobuf", srcs = tf_platform_hdrs([ "protobuf.h", - ]) + tf_platform_srcs([ - "protobuf.cc", ]) + [ + "platform/protobuf.cc", "platform/protobuf_util.cc", "lib/core/status.h", ], @@ -664,7 +662,7 @@ cc_library( name = "lib_proto_compiler", hdrs = [ "platform/protobuf_compiler.h", - ] + tf_additional_proto_compiler_hdrs(), + ], copts = tf_copts(), deps = tf_lib_proto_compiler_deps() + [ ":lib_proto_parsing", @@ -1049,13 +1047,13 @@ cc_library( "platform/default/integral_types.h", "platform/default/logging.h", "platform/default/mutex.h", - "platform/default/protobuf.h", "platform/default/thread_annotations.h", "platform/dynamic_annotations.h", "platform/macros.h", "platform/mutex.h", "platform/platform.h", "platform/prefetch.h", + "platform/protobuf.h", "platform/thread_annotations.h", "platform/types.h", "platform/cpu_info.h", @@ -2317,6 +2315,7 @@ cc_library( "platform/**/logging.cc", "platform/**/human_readable_json.cc", "platform/abi.cc", + "platform/protobuf.cc", ], ) + tf_additional_lib_srcs( exclude = [ diff --git a/tensorflow/core/framework/allocator_registry.h b/tensorflow/core/framework/allocator_registry.h index 9dc74345da..d9f3280c62 100644 --- a/tensorflow/core/framework/allocator_registry.h +++ b/tensorflow/core/framework/allocator_registry.h @@ -21,6 +21,7 @@ limitations under the License. #include #include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/numa.h" diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 1d18cc50b5..1cd83bbcdd 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -530,19 +530,13 @@ def tf_additional_proto_hdrs(): return [ "platform/default/integral_types.h", "platform/default/logging.h", - "platform/default/protobuf.h", ] + if_windows([ "platform/windows/integral_types.h", ]) -def tf_additional_proto_compiler_hdrs(): - return [ - "platform/default/protobuf_compiler.h", - ] - def tf_additional_proto_srcs(): return [ - "platform/default/protobuf.cc", + "platform/protobuf.cc", ] def tf_additional_human_readable_json_deps(): diff --git a/tensorflow/core/platform/default/human_readable_json.cc b/tensorflow/core/platform/default/human_readable_json.cc index bf9c7b7620..977ff1272e 100644 --- a/tensorflow/core/platform/default/human_readable_json.cc +++ b/tensorflow/core/platform/default/human_readable_json.cc @@ -46,7 +46,7 @@ Status HumanReadableJsonToProto(const string& str, protobuf::Message* proto) { return errors::Internal("Cannot parse JSON protos on Android"); #else proto->Clear(); - auto status = google::protobuf::util::JsonStringToMessage(str, proto); + auto status = protobuf::util::JsonStringToMessage(str, proto); if (!status.ok()) { // Convert error_msg google::protobuf::StringPiece to // tensorflow::StringPiece. diff --git a/tensorflow/core/platform/default/protobuf.h b/tensorflow/core/platform/default/protobuf.h deleted file mode 100644 index aeef2d9b88..0000000000 --- a/tensorflow/core/platform/default/protobuf.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_ -#define TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_ - -// IWYU pragma: private, include "third_party/tensorflow/core/platform/protobuf.h" -// IWYU pragma: friend third_party/tensorflow/core/platform/protobuf.h - -#ifndef TENSORFLOW_LITE_PROTOS -#include "google/protobuf/descriptor.h" -#include "google/protobuf/descriptor.pb.h" -#include "google/protobuf/dynamic_message.h" -#include "google/protobuf/io/tokenizer.h" -#include "google/protobuf/text_format.h" -#include "google/protobuf/util/json_util.h" -#include "google/protobuf/util/type_resolver_util.h" -#endif - -#include "google/protobuf/arena.h" -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/io/zero_copy_stream.h" -#include "google/protobuf/io/zero_copy_stream_impl_lite.h" -#include "google/protobuf/map.h" -#include "google/protobuf/repeated_field.h" - -namespace tensorflow { -namespace protobuf = ::google::protobuf; -using protobuf_int64 = ::google::protobuf::int64; -using protobuf_uint64 = ::google::protobuf::uint64; -extern const char* kProtobufInt64Typename; -extern const char* kProtobufUint64Typename; -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_ diff --git a/tensorflow/core/platform/default/protobuf_compiler.h b/tensorflow/core/platform/default/protobuf_compiler.h deleted file mode 100644 index a93d7a184b..0000000000 --- a/tensorflow/core/platform/default/protobuf_compiler.h +++ /dev/null @@ -1,25 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_COMPILER_H_ -#define TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_COMPILER_H_ - -// IWYU pragma: private, include "third_party/tensorflow/core/platform/protobuf_compiler.h" -// IWYU pragma: friend third_party/tensorflow/core/platform/protobuf_compiler.h - -#include "google/protobuf/compiler/importer.h" -#include "tensorflow/core/platform/default/protobuf.h" - -#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_PROTOBUF_H_ diff --git a/tensorflow/core/platform/default/string_coding.cc b/tensorflow/core/platform/default/string_coding.cc deleted file mode 100644 index 7410ee6782..0000000000 --- a/tensorflow/core/platform/default/string_coding.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/platform/default/string_coding.h" - -namespace tensorflow { -namespace port { - -std::unique_ptr NewStringListEncoder(string* out) { - return std::unique_ptr(new StringListEncoder(out)); -} - -std::unique_ptr NewStringListDecoder(const string& in) { - return std::unique_ptr(new StringListDecoder(in)); -} - -} // namespace port -} // namespace tensorflow diff --git a/tensorflow/core/platform/default/string_coding.h b/tensorflow/core/platform/default/string_coding.h deleted file mode 100644 index 70b8ab0144..0000000000 --- a/tensorflow/core/platform/default/string_coding.h +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ -#define TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ - -// IWYU pragma: private, include "third_party/tensorflow/core/platform/tensor_coding.h" -// IWYU pragma: friend third_party/tensorflow/core/platform/tensor_coding.h - -#include "tensorflow/core/lib/core/coding.h" -#include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/protobuf.h" -#include "tensorflow/core/platform/types.h" - -namespace tensorflow { -namespace port { - -// Encodes sequences of strings and serialized protocol buffers into a string. -// Normal usage consists of zero or more calls to Append() and a single call to -// Finalize(). -class StringListEncoder { - public: - explicit StringListEncoder(string* out) : out_(out) {} - - // Encodes the given protocol buffer. This may not be called after Finalize(). - void Append(const protobuf::MessageLite& m) { - core::PutVarint32(out_, m.ByteSize()); - m.AppendToString(&rest_); - } - - // Encodes the given string. This may not be called after Finalize(). - void Append(const string& s) { - core::PutVarint32(out_, s.length()); - strings::StrAppend(&rest_, s); - } - - // Signals end of the encoding process. No other calls are allowed after this. - void Finalize() { strings::StrAppend(out_, rest_); } - - private: - string* out_; - string rest_; -}; - -// Decodes a string into sequences of strings (which may represent serialized -// protocol buffers). Normal usage involves a single call to ReadSizes() in -// order to retrieve the length of all the strings in the sequence. For each -// size returned a call to Data() is expected and will return the actual -// string. -class StringListDecoder { - public: - explicit StringListDecoder(const string& in) : reader_(in) {} - - // Populates the given vector with the lengths of each string in the sequence - // being decoded. Upon returning the vector is guaranteed to contain as many - // elements as there are strings in the sequence. - bool ReadSizes(std::vector* sizes) { - int64 total = 0; - for (auto& size : *sizes) { - if (!core::GetVarint32(&reader_, &size)) return false; - total += size; - } - if (total != static_cast(reader_.size())) { - return false; - } - return true; - } - - // Returns a pointer to the next string in the sequence, then prepares for the - // next call by advancing 'size' characters in the sequence. - const char* Data(uint32 size) { - const char* data = reader_.data(); - reader_.remove_prefix(size); - return data; - } - - private: - StringPiece reader_; -}; - -std::unique_ptr NewStringListEncoder(string* out); -std::unique_ptr NewStringListDecoder(const string& in); - -} // namespace port -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_PLATFORM_DEFAULT_STRING_CODING_H_ diff --git a/tensorflow/core/platform/default/protobuf.cc b/tensorflow/core/platform/protobuf.cc similarity index 72% rename from tensorflow/core/platform/default/protobuf.cc rename to tensorflow/core/platform/protobuf.cc index 548d5834e6..c9e6f3bf5c 100644 --- a/tensorflow/core/platform/default/protobuf.cc +++ b/tensorflow/core/platform/protobuf.cc @@ -1,4 +1,4 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/default/protobuf.h" +#include "tensorflow/core/platform/protobuf.h" namespace tensorflow { -const char* kProtobufInt64Typename = "::google::protobuf::int64"; -const char* kProtobufUint64Typename = "::google::protobuf::uint64"; +const char* kProtobufInt64Typename = "::tensorflow::protobuf_int64"; +const char* kProtobufUint64Typename = "::tensorflow::protobuf_uint64"; } // namespace tensorflow diff --git a/tensorflow/core/platform/protobuf.h b/tensorflow/core/platform/protobuf.h index fcbf1fc8c5..59f4129adf 100644 --- a/tensorflow/core/platform/protobuf.h +++ b/tensorflow/core/platform/protobuf.h @@ -25,13 +25,31 @@ limitations under the License. // TensorFlow code should use the ::tensorflow::protobuf namespace to // refer to all protobuf APIs. -#if defined(PLATFORM_GOOGLE) && !defined(USE_DEFAULT_PROTOBUF) -#include "tensorflow/core/platform/google/protobuf.h" -#else -#include "tensorflow/core/platform/default/protobuf.h" +#ifndef TENSORFLOW_LITE_PROTOS +#include "google/protobuf/io/tokenizer.h" +#include "google/protobuf/descriptor.pb.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/dynamic_message.h" +#include "google/protobuf/text_format.h" +#include "google/protobuf/util/json_util.h" +#include "google/protobuf/util/type_resolver_util.h" #endif +#include "google/protobuf/io/coded_stream.h" +#include "google/protobuf/io/zero_copy_stream.h" +#include "google/protobuf/io/zero_copy_stream_impl_lite.h" +#include "google/protobuf/arena.h" +#include "google/protobuf/map.h" +#include "google/protobuf/repeated_field.h" + namespace tensorflow { + +namespace protobuf = ::google::protobuf; +using protobuf_int64 = ::google::protobuf::int64; +using protobuf_uint64 = ::google::protobuf::uint64; +extern const char* kProtobufInt64Typename; +extern const char* kProtobufUint64Typename; + // Parses a protocol buffer contained in a string in the binary wire format. // Returns true on success. Note: Unlike protobuf's builtin ParseFromString, // this function has no size restrictions on the total size of the encoded @@ -47,8 +65,19 @@ inline const string& ProtobufStringToString(const string& s) { return s; } // Set to . Swapping is allowed, as does not need to be // preserved. inline void SetProtobufStringSwapAllowed(string* src, string* dest) { - dest->swap(*src); + *dest = std::move(*src); +} + +#if defined(TENSORFLOW_PROTOBUF_USES_CORD) +// These versions of ProtobufStringToString and SetProtobufString get used by +// tools/proto_text's generated code. They have the same name as the versions +// in core/platform/protobuf.h, so the generation code doesn't need to determine +// if the type is Cord or string at generation time. +inline string ProtobufStringToString(const Cord& s) { return s.ToString(); } +inline void SetProtobufStringSwapAllowed(string* src, Cord* dest) { + dest->CopyFrom(*src); } +#endif // defined(TENSORFLOW_PROTOBUF_USES_CORD) } // namespace tensorflow diff --git a/tensorflow/core/platform/protobuf_compiler.h b/tensorflow/core/platform/protobuf_compiler.h index 29679e0089..916637d13a 100644 --- a/tensorflow/core/platform/protobuf_compiler.h +++ b/tensorflow/core/platform/protobuf_compiler.h @@ -16,10 +16,6 @@ limitations under the License. #ifndef TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_ #define TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_ -#if defined(PLATFORM_GOOGLE) && !defined(USE_DEFAULT_PROTOBUF) -#include "tensorflow/core/platform/google/protobuf_compiler.h" -#else -#include "tensorflow/core/platform/default/protobuf_compiler.h" -#endif +#include "google/protobuf/compiler/importer.h" #endif // TENSORFLOW_PLATFORM_PROTOBUF_COMPILER_H_ diff --git a/tensorflow/core/platform/tensor_coding.cc b/tensorflow/core/platform/tensor_coding.cc index 84601de39a..3280802bac 100644 --- a/tensorflow/core/platform/tensor_coding.cc +++ b/tensorflow/core/platform/tensor_coding.cc @@ -19,6 +19,12 @@ limitations under the License. #include "tensorflow/core/lib/core/coding.h" #include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/protobuf.h" + +#if defined(TENSORFLOW_PROTOBUF_USES_CORD) +#include "strings/cord_varint.h" +#endif // defined(TENSORFLOW_PROTOBUF_USES_CORD) namespace tensorflow { namespace port { @@ -66,5 +72,174 @@ void CopyFromArray(string* s, const char* base, size_t bytes) { s->assign(base, bytes); } +class StringListEncoderImpl : public StringListEncoder { + public: + explicit StringListEncoderImpl(string* out) : out_(out) {} + ~StringListEncoderImpl() override = default; + + void Append(const protobuf::MessageLite& m) override { + core::PutVarint32(out_, m.ByteSizeLong()); + tensorflow::string serialized_message; + m.AppendToString(&serialized_message); + strings::StrAppend(&rest_, serialized_message); + } + + void Append(const string& s) override { + core::PutVarint32(out_, s.length()); + strings::StrAppend(&rest_, s); + } + + void Finalize() override { strings::StrAppend(out_, rest_); } + + private: + string* out_; + string rest_; +}; + +class StringListDecoderImpl : public StringListDecoder { + public: + explicit StringListDecoderImpl(const string& in) : reader_(in) {} + ~StringListDecoderImpl() override = default; + + bool ReadSizes(std::vector* sizes) override { + int64 total = 0; + for (auto& size : *sizes) { + if (!core::GetVarint32(&reader_, &size)) return false; + total += size; + } + if (total != static_cast(reader_.size())) { + return false; + } + return true; + } + + const char* Data(uint32 size) override { + const char* data = reader_.data(); + reader_.remove_prefix(size); + return data; + } + + private: + StringPiece reader_; +}; + +std::unique_ptr NewStringListEncoder(string* out) { + return std::unique_ptr(new StringListEncoderImpl(out)); +} + +std::unique_ptr NewStringListDecoder(const string& in) { + return std::unique_ptr(new StringListDecoderImpl(in)); +} + +#if defined(TENSORFLOW_PROTOBUF_USES_CORD) +void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out) { + obj->Ref(); + out->Clear(); + // Defines a lambda to unref "obj" when Cord deletes this piece of + // memory. +[] converts the lambda to a C style function pointer. + auto cleanup = +[](absl::string_view donotcare, void* obj) { + reinterpret_cast(obj)->Unref(); + }; + out->AppendExternalMemory(absl::string_view(src.data(), src.size()), obj, + cleanup); +} + +void EncodeStringList(const string* strings, int64 n, Cord* out) { + out->Clear(); + for (int i = 0; i < n; ++i) { + ::strings::CordAppendVarint(strings[i].size(), out); + } + for (int i = 0; i < n; ++i) { + out->Append(strings[i]); + } +} + +bool DecodeStringList(const Cord& src, string* strings, int64 n) { + std::vector sizes(n); + CordReader reader(src); + int64 tot = 0; + for (auto& v : sizes) { + if (!::strings::CordReaderReadVarint(&reader, &v)) return false; + tot += v; + } + if (tot != reader.Available()) { + return false; + } + string* data = strings; + for (int i = 0; i < n; ++i, ++data) { + auto size = sizes[i]; + if (size > reader.Available()) { + return false; + } + gtl::STLStringResizeUninitialized(data, size); + reader.ReadN(size, gtl::string_as_array(data)); + } + return true; +} + +void CopyFromArray(Cord* c, const char* base, size_t bytes) { + c->CopyFrom(base, bytes); +} + +class CordStringListEncoderImpl : public StringListEncoder { + public: + explicit CordStringListEncoderImpl(Cord* out) : out_(out) {} + ~CordStringListEncoderImpl() override = default; + + void Append(const protobuf::MessageLite& m) override { + ::strings::CordAppendVarint(m.ByteSizeLong(), out_); + m.AppendToString(&rest_); + } + + void Append(const string& s) override { + ::strings::CordAppendVarint(s.length(), out_); + rest_.append(s.data(), s.size()); + } + + void Finalize() override { out_->Append(rest_); } + + private: + Cord* out_; + string rest_; +}; + +class CordStringListDecoderImpl : public StringListDecoder { + public: + explicit CordStringListDecoderImpl(const Cord& in) : reader_(in) {} + ~CordStringListDecoderImpl() override = default; + + bool ReadSizes(std::vector* sizes) override { + int64 total = 0; + for (auto& size : *sizes) { + if (!::strings::CordReaderReadVarint(&reader_, &size)) return false; + total += size; + } + if (total != static_cast(reader_.Available())) { + return false; + } + return true; + } + + const char* Data(uint32 size) override { + tmp_.resize(size); + reader_.ReadN(size, tmp_.data()); + return tmp_.data(); + } + + private: + CordReader reader_; + std::vector tmp_; +}; + +std::unique_ptr NewStringListEncoder(Cord* out) { + return std::unique_ptr(new CordStringListEncoderImpl(out)); +} + +std::unique_ptr NewStringListDecoder(const Cord& in) { + return std::unique_ptr(new CordStringListDecoderImpl(in)); +} + +#endif // defined(TENSORFLOW_PROTOBUF_USES_CORD) + } // namespace port } // namespace tensorflow diff --git a/tensorflow/core/platform/tensor_coding.h b/tensorflow/core/platform/tensor_coding.h index 6c6d75830d..7f6075df9d 100644 --- a/tensorflow/core/platform/tensor_coding.h +++ b/tensorflow/core/platform/tensor_coding.h @@ -21,14 +21,9 @@ limitations under the License. #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/platform.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" -#ifdef PLATFORM_GOOGLE -#include "tensorflow/core/platform/google/cord_coding.h" -#else -#include "tensorflow/core/platform/default/string_coding.h" -#endif - namespace tensorflow { namespace port { @@ -52,6 +47,68 @@ bool DecodeStringList(const string& src, string* strings, int64 n); // Assigns base[0..bytes-1] to *s void CopyFromArray(string* s, const char* base, size_t bytes); +// Encodes sequences of strings and serialized protocol buffers into a string. +// Normal usage consists of zero or more calls to Append() and a single call to +// Finalize(). +class StringListEncoder { + public: + virtual ~StringListEncoder() = default; + + // Encodes the given protocol buffer. This may not be called after Finalize(). + virtual void Append(const protobuf::MessageLite& m) = 0; + + // Encodes the given string. This may not be called after Finalize(). + virtual void Append(const string& s) = 0; + + // Signals end of the encoding process. No other calls are allowed after this. + virtual void Finalize() = 0; +}; + +// Decodes a string into sequences of strings (which may represent serialized +// protocol buffers). Normal usage involves a single call to ReadSizes() in +// order to retrieve the length of all the strings in the sequence. For each +// size returned a call to Data() is expected and will return the actual +// string. +class StringListDecoder { + public: + virtual ~StringListDecoder() = default; + + // Populates the given vector with the lengths of each string in the sequence + // being decoded. Upon returning the vector is guaranteed to contain as many + // elements as there are strings in the sequence. + virtual bool ReadSizes(std::vector* sizes) = 0; + + // Returns a pointer to the next string in the sequence, then prepares for the + // next call by advancing 'size' characters in the sequence. + virtual const char* Data(uint32 size) = 0; +}; + +std::unique_ptr NewStringListEncoder(string* out); +std::unique_ptr NewStringListDecoder(const string& in); + +#if defined(TENSORFLOW_PROTOBUF_USES_CORD) +// Store src contents in *out. If backing memory for src is shared with *out, +// will ref obj during the call and will arrange to unref obj when no +// longer needed. +void AssignRefCounted(StringPiece src, core::RefCounted* obj, Cord* out); + +// TODO(kmensah): Macro guard this with a check for Cord support. +inline void CopyToArray(const Cord& src, char* dst) { src.CopyToArray(dst); } + +// Store encoding of strings[0..n-1] in *out. +void EncodeStringList(const string* strings, int64 n, Cord* out); + +// Decode n strings from src and store in strings[0..n-1]. +// Returns true if successful, false on parse error. +bool DecodeStringList(const Cord& src, string* strings, int64 n); + +// Assigns base[0..bytes-1] to *c +void CopyFromArray(Cord* c, const char* base, size_t bytes); + +std::unique_ptr NewStringListEncoder(Cord* out); +std::unique_ptr NewStringListDecoder(const Cord& in); +#endif // defined(TENSORFLOW_PROTOBUF_USES_CORD) + } // namespace port } // namespace tensorflow -- GitLab From 72d6e6993d3ee430d4a41fc92e2804a1c597dc5f Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Fri, 15 Feb 2019 09:36:39 -0800 Subject: [PATCH 205/351] [XLA] Fix TrySimplifyScalarSlice() Correctly check whether a scalar slice is in the padding. How embarrassing. PiperOrigin-RevId: 234159089 --- .../xla/service/algebraic_simplifier.cc | 6 +-- .../xla/service/algebraic_simplifier_test.cc | 46 ++++++++++++++++++- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 9b037960cd..bd17e96106 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2675,11 +2675,11 @@ StatusOr AlgebraicSimplifierVisitor::TrySimplifyScalarSlice( int64 start = slice->slice_starts(i); int64 low = padding_config.dimensions(i).edge_padding_low(); int64 data = pad->operand(0)->shape().dimensions(i); - if (start >= low && start < low + data) { - return false; + if (start < low || start >= low + data) { + return true; } } - return true; + return false; }(); if (in_padding) { diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index d959fafc0c..af03fcb100 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -3960,7 +3960,7 @@ TEST_F(AlgebraicSimplifierTest, SliceOfPadMidNonScalar) { param = f32[3,4] parameter(0) constant = f32[] constant(0.0) pad = f32[8,10] pad(f32[3,4] param, f32[] constant), padding=3_2x1_5 - ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[9:10]} + ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[4:5]} } )"; TF_ASSERT_OK_AND_ASSIGN(auto module, @@ -3971,6 +3971,27 @@ TEST_F(AlgebraicSimplifierTest, SliceOfPadMidNonScalar) { EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); } +TEST_F(AlgebraicSimplifierTest, SliceOfPadMidScalarConstant) { + const char* hlo_string = R"( + HloModule module + + ENTRY test { + param = f32[3,4] parameter(0) + constant = f32[] constant(0.0) + pad = f32[8,10] pad(f32[3,4] param, f32[] constant), padding=3_2x1_5 + ROOT slice = f32[1,1] slice(f32[8,10] pad), slice={[5:6],[9:10]} + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options; + AlgebraicSimplifier simplifier(options); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Reshape(m::Constant()))); +} + TEST_F(AlgebraicSimplifierTest, SliceOfPadMidScalar) { const char* hlo_string = R"( HloModule module @@ -3992,6 +4013,29 @@ TEST_F(AlgebraicSimplifierTest, SliceOfPadMidScalar) { EXPECT_THAT(root, GmockMatch(m::Parameter())); } +TEST_F(AlgebraicSimplifierTest, SliceOfPadSomeDimsInPadding) { + const char* hlo_string = R"( + HloModule module + + ENTRY entry () -> f32[1]{0} { + constant.val = f32[] constant(4) + constant.pad = f32[] constant(-7) + reshape.1 = f32[1,1,1]{2,1,0} reshape(f32[] constant.val) + pad = f32[3,3,3]{2,1,0} pad(f32[1,1,1]{2,1,0} reshape.1, f32[] constant.pad), padding=0_2x0_2x2_0 + slice = f32[1,1,1]{2,1,0} slice(f32[3,3,3]{2,1,0} pad), slice={[0:1], [0:1], [0:1]} + ROOT reshape.2 = f32[1]{0} reshape(f32[1,1,1]{2,1,0} slice) + } + )"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options; + AlgebraicSimplifier simplifier(options); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Reshape(m::ConstantScalar(-7.0)))); +} + TEST_F(AlgebraicSimplifierTest, SliceOfConcatScalarInput) { const char* hlo_string = R"( HloModule module -- GitLab From 1d15f5a20558dff73fd5f8c2d7191b457540a350 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 09:37:20 -0800 Subject: [PATCH 206/351] Fix a typo for "graph_util.tensor_shape_from_node_def_name" deprecation. PiperOrigin-RevId: 234159232 --- tensorflow/python/framework/graph_util_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 50cdb7a15d..103c611163 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -197,7 +197,7 @@ def extract_sub_graph(graph_def, dest_nodes): @deprecation.deprecated( date=None, - instructions="Use tf.compat.v1.graph_util.remove_training_nodes") + instructions="Use tf.compat.v1.graph_util.tensor_shape_from_node_def_name") @tf_export(v1=["graph_util.tensor_shape_from_node_def_name"]) def tensor_shape_from_node_def_name(graph, input_name): """Convenience function to get a shape from a NodeDef's input string.""" -- GitLab From fcefeaf6e8c69a6b596d278df3bc9d1591e52c47 Mon Sep 17 00:00:00 2001 From: Andy Ly Date: Fri, 15 Feb 2019 09:42:15 -0800 Subject: [PATCH 207/351] [Grappler] Float/Double bit comparison when packing tensors. PiperOrigin-RevId: 234160134 --- .../core/grappler/optimizers/constant_folding.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index cf495eecf5..206a9b2eef 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -173,18 +173,18 @@ bool HasTPUAttributes(const NodeDef& node) { } template -bool IsDenormal(T x) { - return false; +bool PackedValuesNotEqual(T a, T b) { + return a != b; } template <> -bool IsDenormal(float x) { - return !std::isnormal(x); +bool PackedValuesNotEqual(float a, float b) { + return reinterpret_cast(a) != reinterpret_cast(b); } template <> -bool IsDenormal(double x) { - return !std::isnormal(x); +bool PackedValuesNotEqual(double a, double b) { + return reinterpret_cast(a) != reinterpret_cast(b); } float QuantizedTypeMinAsFloat(DataType data_type) { @@ -1076,7 +1076,7 @@ Status ConstantFolding::CreateNodeDef(const string& name, int64 last_index = 0; \ for (int64 i = 0; i < tensor->NumElements(); ++i) { \ TYPE cur = *val_ptr++; \ - if (cur != last || IsDenormal(cur)) { \ + if (PackedValuesNotEqual(cur, last)) { \ last = cur; \ last_index = i; \ } \ -- GitLab From bb58e4aab40d0106881aec49cb3af5f5d7cffa2c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 09:49:56 -0800 Subject: [PATCH 208/351] Improve performance of small scalar reductions on GPU PiperOrigin-RevId: 234161360 --- tensorflow/core/kernels/reduction_gpu_kernels.cu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index ffa41ece49..e2d8b97cb2 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -500,7 +500,7 @@ void LaunchScalarReduction(OpKernelContext* ctx, OUT_T out, IN_T in, BlockReduceKernel <<>>(in, out, in_size, op, init); return; - } else if (in_size <= 1 << 19) { + } else if (in_size <= 1 << 18) { const int num_threads = 256; const int num_blocks = std::min(32, Eigen::divup(in_size, num_threads)); // it seems like tailoring this to the GPU -- GitLab From 637db0e2511ff60d1556923484d499ffeba5c5af Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Fri, 15 Feb 2019 09:58:47 -0800 Subject: [PATCH 209/351] Another struct initialization fix for Windows build. PiperOrigin-RevId: 234162813 --- .../lite/tools/optimize/subgraph_quantizer.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/lite/tools/optimize/subgraph_quantizer.cc b/tensorflow/lite/tools/optimize/subgraph_quantizer.cc index c1ff444d78..05115e8775 100644 --- a/tensorflow/lite/tools/optimize/subgraph_quantizer.cc +++ b/tensorflow/lite/tools/optimize/subgraph_quantizer.cc @@ -67,17 +67,17 @@ struct OpWithBiasTensors { const OpWithBiasTensors* GetInfoForOpWithBiasTensor(BuiltinOperator op_code) { if (op_code == BuiltinOperator_CONV_2D) { - static OpWithBiasTensors op_info = {.activation_input_index = 0, - .weights_input_index = 1, - .bias_input_index = 2, - .index_for_channel_in_weights = 0}; + static OpWithBiasTensors op_info = {/* activation_input_index */ 0, + /* weights_input_index */ 1, + /* bias_input_index */ 2, + /* index_for_channel_in_weights */ 0}; return &op_info; } if (op_code == BuiltinOperator_DEPTHWISE_CONV_2D) { - static OpWithBiasTensors op_info = {.activation_input_index = 0, - .weights_input_index = 1, - .bias_input_index = 2, - .index_for_channel_in_weights = 3}; + static OpWithBiasTensors op_info = {/* bias_input_index */ 0, + /* bias_input_index */ 1, + /* bias_input_index */ 2, + /* index_for_channel_in_weights */ 3}; return &op_info; } -- GitLab From 12c5e6c4ce6e22d5407abf7bb7c42748c313c5a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 10:10:32 -0800 Subject: [PATCH 210/351] For negative dimension size of a tile, print "Invalid value xxx" instead of crashing. PiperOrigin-RevId: 234165280 --- tensorflow/compiler/xla/layout.cc | 10 +++++----- tensorflow/compiler/xla/layout_test.cc | 3 +++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/layout.cc b/tensorflow/compiler/xla/layout.cc index d2f7985aab..91641c244b 100644 --- a/tensorflow/compiler/xla/layout.cc +++ b/tensorflow/compiler/xla/layout.cc @@ -35,11 +35,11 @@ string Tile::ToString() const { if (dim >= 0) { elements.push_back(std::to_string(dim)); } else { - CHECK_EQ(dim, kCombineDimension) - << "Tile dimension size needs to be mininum int64 value if it's " - "negative. Value is " - << dim; - elements.push_back("*"); + if (dim == kCombineDimension) { + elements.push_back("*"); + } else { + elements.push_back(absl::StrCat("Invalid value ", dim)); + } } } return absl::StrCat("(", absl::StrJoin(elements, ","), ")"); diff --git a/tensorflow/compiler/xla/layout_test.cc b/tensorflow/compiler/xla/layout_test.cc index 7d43b0b87c..e6465966d4 100644 --- a/tensorflow/compiler/xla/layout_test.cc +++ b/tensorflow/compiler/xla/layout_test.cc @@ -42,6 +42,9 @@ TEST_F(LayoutTest, ToString) { EXPECT_EQ( Layout({1, 0}, {Tile({2, 55})}).set_element_size_in_bits(42).ToString(), "{1,0:T(2,55)E(42)}"); + EXPECT_EQ( + Layout({1, 0}, {Tile({-2, 55})}).set_element_size_in_bits(42).ToString(), + "{1,0:T(Invalid value -2,55)E(42)}"); } TEST_F(LayoutTest, StreamOut) { -- GitLab From 896ad1053b9045e76a4e9e5ab553ee311df44d1f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 10:16:46 -0800 Subject: [PATCH 211/351] Handle list inputs correctly in AddEagerOpToGraph API. PiperOrigin-RevId: 234166416 --- tensorflow/c/c_api_experimental.cc | 33 +++++++++++++++++++++---- tensorflow/c/c_api_experimental_test.cc | 24 ++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index a8325ce494..7ff4084dec 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -9064,11 +9064,6 @@ TF_Operation* TFE_AddEagerOpToGraph(TFE_Op* op, TFE_TraceContext* trace_ctx, tensorflow::strings::StrCat(op_type, "_", trace_ctx->node_counter++); auto* desc = TF_NewOperation(trace_ctx->graph, op_type.c_str(), op_name.c_str()); - for (auto* input : op->operation.Inputs()) { - auto symbolic_input = getOrCreateSymbolicTensor(trace_ctx, input, status); - if (!status->status.ok()) return nullptr; - TF_AddInput(desc, symbolic_input); - } VLOG(1) << "Adding attrs."; tensorflow::AttrValueMap attrs; @@ -9077,6 +9072,34 @@ TF_Operation* TFE_AddEagerOpToGraph(TFE_Op* op, TFE_TraceContext* trace_ctx, desc->node_builder.Attr(attr.first, attr.second); } + VLOG(1) << "Adding inputs."; + const auto& inputs = op->operation.Inputs(); + size_t inputIndex = 0; + const tensorflow::OpDef& op_def = desc->node_builder.op_def(); + for (const tensorflow::OpDef::ArgDef& input_arg : op_def.input_arg()) { + // TODO(bgogul): Add support for number attributes. + DCHECK(input_arg.number_attr().empty()) + << "Number attributes is not implemented yet."; + if (input_arg.type_list_attr().empty()) { + auto symbolic_input = + getOrCreateSymbolicTensor(trace_ctx, inputs[inputIndex++], status); + if (!status->status.ok()) return nullptr; + TF_AddInput(desc, symbolic_input); + continue; + } + const std::string& type_list_attr = input_arg.type_list_attr(); + const auto& attr_value = attrs[type_list_attr]; + DCHECK(attr_value.value_case() == tensorflow::AttrValue::kList) + << "Type list attribute should be a list!"; + std::vector list_inputs(attr_value.list().type_size()); + for (TF_Output& list_input : list_inputs) { + list_input = + getOrCreateSymbolicTensor(trace_ctx, inputs[inputIndex++], status); + if (!status->status.ok()) return nullptr; + } + TF_AddInputList(desc, list_inputs.data(), list_inputs.size()); + } + auto* graph_op = TF_FinishOperation(desc, status); if (!status->status.ok()) return nullptr; diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc index 354ee5f49f..c54021a751 100644 --- a/tensorflow/c/c_api_experimental_test.cc +++ b/tensorflow/c/c_api_experimental_test.cc @@ -446,5 +446,29 @@ TEST_F(AddEagerOpToGraphTest, ListAttributesArePreserved) { TFE_DeleteOp(squeeze); } +TEST_F(AddEagerOpToGraphTest, ListInputsAreAddedCorrectly) { + TFE_TensorHandle* scalar = TestScalarTensorHandle(); + TFE_Op* identityn = TFE_NewOp(eager_ctx_, "IdentityN", status_); + CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_); + constexpr size_t kNumInputs = 3; + for (size_t i = 0; i < kNumInputs; ++i) { + TFE_OpAddInput(identityn, scalar, status_); + } + TF_DataType types[kNumInputs] = {TF_FLOAT, TF_FLOAT, TF_FLOAT}; + TFE_OpSetAttrTypeList(identityn, "T", types, kNumInputs); + AddEagerOpToGraphAndCheck( + identityn, [this, kNumInputs](TF_Operation* graph_op) { + EXPECT_EQ(TF_OperationNumInputs(graph_op), kNumInputs); + EXPECT_EQ(TF_OperationInputListLength(graph_op, "input", status_), + kNumInputs); + CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_); + EXPECT_EQ(TF_OperationOutputListLength(graph_op, "output", status_), + kNumInputs); + CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_); + }); + TFE_DeleteTensorHandle(scalar); + TFE_DeleteOp(identityn); +} + } // namespace } // namespace tensorflow -- GitLab From 72a0e0761513f35f892298cc27687e968462b452 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 15 Feb 2019 10:33:52 -0800 Subject: [PATCH 212/351] Add fast path for `OpRegistry::LookUp()` that avoids acquiring an exclusive lock. The registry is read-mostly, and writes are rare (startup + any calls to `tf.load_op_library()`), so in the common case it is sufficient can acquire a shared lock for a successful lookup. This change reduces the overhead of `MapDatasetOp::MakeDataset()` inside a parallel interleave, and any other case where multiple threads might concurrently attempt to instantiate a function or build a subgraph. PiperOrigin-RevId: 234169932 --- tensorflow/core/framework/op.cc | 15 +++++++++++++++ tensorflow/core/framework/op.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/core/framework/op.cc b/tensorflow/core/framework/op.cc index b8309eafb0..b29d7ae77f 100644 --- a/tensorflow/core/framework/op.cc +++ b/tensorflow/core/framework/op.cc @@ -60,6 +60,21 @@ void OpRegistry::Register(const OpRegistrationDataFactory& op_data_factory) { Status OpRegistry::LookUp(const string& op_type_name, const OpRegistrationData** op_reg_data) const { + { + tf_shared_lock l(mu_); + if (initialized_) { + if (const OpRegistrationData* res = + gtl::FindWithDefault(registry_, op_type_name, nullptr)) { + *op_reg_data = res; + return Status::OK(); + } + } + } + return LookUpSlow(op_type_name, op_reg_data); +} + +Status OpRegistry::LookUpSlow(const string& op_type_name, + const OpRegistrationData** op_reg_data) const { *op_reg_data = nullptr; const OpRegistrationData* res = nullptr; diff --git a/tensorflow/core/framework/op.h b/tensorflow/core/framework/op.h index 81ed5f95f0..538ce04ef4 100644 --- a/tensorflow/core/framework/op.h +++ b/tensorflow/core/framework/op.h @@ -144,6 +144,9 @@ class OpRegistry : public OpRegistryInterface { Status RegisterAlreadyLocked(const OpRegistrationDataFactory& op_data_factory) const EXCLUSIVE_LOCKS_REQUIRED(mu_); + Status LookUpSlow(const string& op_type_name, + const OpRegistrationData** op_reg_data) const; + mutable mutex mu_; // Functions in deferred_ may only be called with mu_ held. mutable std::vector deferred_ GUARDED_BY(mu_); -- GitLab From 81ae50e5c17d5739f1cd615b4dcbbeb91bbaf638 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Fri, 15 Feb 2019 10:34:03 -0800 Subject: [PATCH 213/351] [XLA] Fix a bug in the HLO evaluator implementation of the Clamp operation. Previously, we incorrectly convert integer values to floating point values then compute the clamped result and convert the result back to the desired type. This can lead to incorrect result when the value can't be represented by the floating point type precisely. The fix is to avoid such type conversion. Add a test case. PiperOrigin-RevId: 234169980 --- .../xla/service/hlo_evaluator_test.cc | 32 +++++++++++++++++++ .../xla/service/hlo_evaluator_typed_visitor.h | 2 +- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index fb8cd299ce..b2eb5874e6 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -111,6 +111,24 @@ class HloEvaluatorTest : public HloTestBase { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } + void TestTernaryOp(HloOpcode opcode, Literal expected, Literal src0, + Literal src1, Literal src2) { + HloComputation::Builder b(TestName()); + auto operand0 = + b.AddInstruction(HloInstruction::CreateConstant(std::move(src0))); + auto operand1 = + b.AddInstruction(HloInstruction::CreateConstant(std::move(src1))); + auto operand2 = + b.AddInstruction(HloInstruction::CreateConstant(std::move(src2))); + b.AddInstruction(HloInstruction::CreateTernary( + expected.shape(), opcode, operand0, operand1, operand2)); + m_->AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); + } + protected: explicit HloEvaluatorTest(bool use_bfloat16) : use_bfloat16_(use_bfloat16) {} HloEvaluator evaluator_; @@ -254,6 +272,20 @@ TEST_F(HloEvaluatorTest, DoesDivideInt64) { TestBinaryOp(HloOpcode::kDivide, std::move(expected), std::move(lhs), std::move(rhs)); } + +TEST_F(HloEvaluatorTest, DoesClampS64) { + auto low = LiteralUtil::CreateR1( + {-8616761059752331528LL, 6780561065411491190LL, -8616761059752331528LL}); + auto value = LiteralUtil::CreateR1( + {-6780561065411491190LL, 6780561065411491180LL, 4241131823772864090LL}); + auto high = LiteralUtil::CreateR1( + {-6780561065411491180LL, 8616761059752331528LL, 3832151243857508051LL}); + auto expected = LiteralUtil::CreateR1( + {-6780561065411491190LL, 6780561065411491190LL, 3832151243857508051LL}); + TestTernaryOp(HloOpcode::kClamp, std::move(expected), std::move(low), + std::move(value), std::move(high)); +} + TEST_P(HloEvaluatorBf16Test, DoesDivideDouble) { auto lhs = LiteralUtil::CreateR2({{1.0, 0.0}, {-100.0, 4.0}}); auto rhs = LiteralUtil::CreateR2({{2.2, 4.0}, {4.0, 4.0}}); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index e0a0fc4acb..c6b8d28982 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -903,7 +903,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { return static_cast(NAN); } return static_cast( - std::fmin(high, std::fmax(value, low))); + std::min(high, std::max(value, low))); }; TF_ASSIGN_OR_RETURN( parent_->evaluated_[clamp], -- GitLab From 15f97c58e62336cf5fcf92561a7c7272726340e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 10:37:15 -0800 Subject: [PATCH 214/351] Allow tiles and element size to be ignored in layout comparison. PiperOrigin-RevId: 234170691 --- tensorflow/compiler/xla/layout.cc | 22 ++++++++++--- tensorflow/compiler/xla/layout.h | 45 ++++++++++++++++++++++---- tensorflow/compiler/xla/layout_test.cc | 9 ++++++ tensorflow/compiler/xla/shape.cc | 9 +++++- tensorflow/compiler/xla/shape.h | 16 +++++++-- 5 files changed, 86 insertions(+), 15 deletions(-) diff --git a/tensorflow/compiler/xla/layout.cc b/tensorflow/compiler/xla/layout.cc index 91641c244b..000c4fdc40 100644 --- a/tensorflow/compiler/xla/layout.cc +++ b/tensorflow/compiler/xla/layout.cc @@ -95,12 +95,24 @@ string Layout::ToString() const { } } +bool Layout::Equal::operator()(const Layout& lhs, const Layout& rhs) { + if (lhs.format() != rhs.format() || + lhs.minor_to_major() != rhs.minor_to_major() || + lhs.max_sparse_elements() != rhs.max_sparse_elements()) { + return false; + } + if (!ignore_tiles_ && lhs.tiles() != rhs.tiles()) { + return false; + } + if (!ignore_element_size_ && + lhs.element_size_in_bits() != rhs.element_size_in_bits()) { + return false; + } + return true; +} + bool Layout::operator==(const Layout& other) const { - return (other.format() == format() && - other.minor_to_major() == minor_to_major() && - other.element_size_in_bits() == element_size_in_bits() && - other.max_sparse_elements() == max_sparse_elements() && - other.tiles() == tiles()); + return Equal()(*this, other); } std::ostream& operator<<(std::ostream& out, const Tile& tile) { diff --git a/tensorflow/compiler/xla/layout.h b/tensorflow/compiler/xla/layout.h index 1faa162998..acc449b781 100644 --- a/tensorflow/compiler/xla/layout.h +++ b/tensorflow/compiler/xla/layout.h @@ -85,10 +85,12 @@ class Layout { // Constructs a dense tiled layout with the given minor-to-major order and // tiles. - Layout(absl::Span minor_to_major, absl::Span tiles) + Layout(absl::Span minor_to_major, absl::Span tiles, + int64 element_size_in_bits = 0) : format_(DENSE), minor_to_major_(minor_to_major.begin(), minor_to_major.end()), - tiles_(tiles.begin(), tiles.end()) {} + tiles_(tiles.begin(), tiles.end()), + element_size_in_bits_(element_size_in_bits) {} // Construct a shape from a LayoutProto. static Layout CreateFromProto(const LayoutProto& proto); @@ -99,6 +101,37 @@ class Layout { // Returns a human-readable string that represents this layout. string ToString() const; + // Equal is a configurable functor to check the equality of two layouts. + // + // Examples: + // + // - Comparing two layouts ignoring their difference in tiles: + // Equal().IgnoreTiles()(layout1, layout2); + // + // - Comparing two layouts ignoring their difference in tiles and element + // size: + // Equal().IgnoreTiles().IgnoreElementSize()(layout1, layout2); + class Equal { + public: + Equal() = default; + + bool operator()(const Layout& lhs, const Layout& rhs); + + Equal& IgnoreTiles() { + ignore_tiles_ = true; + return *this; + } + + Equal& IgnoreElementSize() { + ignore_element_size_ = true; + return *this; + } + + private: + bool ignore_tiles_ = false; + bool ignore_element_size_ = false; + }; + bool operator==(const Layout& other) const; bool operator!=(const Layout& other) const { return !(*this == other); } @@ -173,7 +206,7 @@ class Layout { element_size_in_bits_ = 0; } - public: + private: // The format of this layout. Format format_ = INVALID_FORMAT; @@ -186,11 +219,11 @@ class Layout { // memory. This field must be zero unless the format is SPARSE. int64 max_sparse_elements_ = 0; - // The number of bits used to store an individual array element. - int64 element_size_in_bits_ = 0; - // The tiles used in tiling-based layout. std::vector tiles_; + + // The number of bits used to store an individual array element. + int64 element_size_in_bits_ = 0; }; std::ostream& operator<<(std::ostream& out, const Tile& Tile); diff --git a/tensorflow/compiler/xla/layout_test.cc b/tensorflow/compiler/xla/layout_test.cc index e6465966d4..f5d71c553e 100644 --- a/tensorflow/compiler/xla/layout_test.cc +++ b/tensorflow/compiler/xla/layout_test.cc @@ -87,6 +87,15 @@ TEST_F(LayoutTest, Equality) { Layout().set_format(SPARSE).set_max_sparse_elements(42)); EXPECT_NE(Layout().set_format(SPARSE).set_max_sparse_elements(42), Layout().set_format(SPARSE).set_max_sparse_elements(24)); + + EXPECT_FALSE( + Layout::Equal()(Layout({0, 1, 2}, {Tile({42, 44})}), Layout({0, 1, 2}))); + EXPECT_TRUE(Layout::Equal().IgnoreTiles()(Layout({0, 1, 2}, {Tile({42, 44})}), + Layout({0, 1, 2}))); + EXPECT_FALSE( + Layout::Equal()(Layout({0, 1, 2}, {}, 32), Layout({0, 1, 2}, {}, 1))); + EXPECT_TRUE(Layout::Equal().IgnoreElementSize()(Layout({0, 1, 2}, {}, 32), + Layout({0, 1, 2}, {}, 1))); } TEST_F(LayoutTest, LayoutToFromProto) { diff --git a/tensorflow/compiler/xla/shape.cc b/tensorflow/compiler/xla/shape.cc index 93d630b8f7..94854047e5 100644 --- a/tensorflow/compiler/xla/shape.cc +++ b/tensorflow/compiler/xla/shape.cc @@ -147,7 +147,14 @@ bool Shape::Equal::operator()(const Shape& lhs, const Shape& rhs) { return false; } if (LayoutUtil::IsDenseArray(lhs)) { - if (lhs.layout() != rhs.layout()) { + Layout::Equal equal; + if (ignore_tiles_in_layout_) { + equal.IgnoreTiles(); + } + if (ignore_element_size_in_layout_) { + equal.IgnoreElementSize(); + } + if (!equal(lhs.layout(), rhs.layout())) { VLOG(3) << "CompareShapes: lhs layout != rhs layout"; return false; } diff --git a/tensorflow/compiler/xla/shape.h b/tensorflow/compiler/xla/shape.h index 1d594904e0..78cea83c6d 100644 --- a/tensorflow/compiler/xla/shape.h +++ b/tensorflow/compiler/xla/shape.h @@ -146,10 +146,10 @@ class Shape { // // Examples: // - // - Comparing two shapes ignoring they layout difference: + // - Comparing two shapes ignoring their layout difference: // Equal().IgnoreLayout()(shape1, shape2); // - // - Comparing two shapes ignoring they layout and element type difference: + // - Comparing two shapes ignoring their layout and element type difference: // Equal().IgnoreLayout().IgnoreElementType()(shape1, shape2); class Equal { public: @@ -161,6 +161,14 @@ class Shape { ignore_layout_ = true; return *this; } + Equal& IgnoreTilesInLayout() { + ignore_tiles_in_layout_ = true; + return *this; + } + Equal& IgnoreElementSizeInLayout() { + ignore_element_size_in_layout_ = true; + return *this; + } Equal& IgnoreElementType() { ignore_element_type_ = true; return *this; @@ -174,8 +182,10 @@ class Shape { return *this; } - public: + private: bool ignore_layout_ = false; + bool ignore_tiles_in_layout_ = false; + bool ignore_element_size_in_layout_ = false; bool ignore_element_type_ = false; bool ignore_fp_precision_ = false; bool ignore_dynamic_dimension_ = false; -- GitLab From 2cf06739ee3e5a8eaed56d986f3abc30431323a7 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Fri, 15 Feb 2019 10:41:04 -0800 Subject: [PATCH 215/351] [XLA:Python] Add methods to Computation that render computations as HLO text or as HLO graphs. PiperOrigin-RevId: 234171494 --- tensorflow/compiler/xla/python/BUILD | 3 ++- .../xla/python/local_computation_builder.cc | 27 +++++++++++++++++++ .../xla/python/local_computation_builder.h | 6 +++++ .../xla/python/local_computation_builder.i | 11 ++++++++ tensorflow/compiler/xla/python/xla_client.py | 16 +++++++++++ .../compiler/xla/python/xla_client_test.py | 20 ++++++++++++++ .../compiler/xla/service/hlo_graph_dumper.cc | 11 ++++++++ .../compiler/xla/service/hlo_graph_dumper.h | 13 ++++++++- 8 files changed, 105 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD index a0687e0d52..d09bdc6460 100644 --- a/tensorflow/compiler/xla/python/BUILD +++ b/tensorflow/compiler/xla/python/BUILD @@ -78,6 +78,7 @@ cc_library( "//tensorflow/compiler/xla/client/lib:math", "//tensorflow/compiler/xla/client/lib:qr", "//tensorflow/compiler/xla/service:computation_placer", + "//tensorflow/compiler/xla/service:hlo_graph_dumper", "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xla/service:shaped_buffer", "//tensorflow/compiler/xla/service/cpu:custom_call_target_registry", @@ -85,7 +86,7 @@ cc_library( "//tensorflow/compiler/xrt/cc:xrt_ops", "//tensorflow/core:framework", "//tensorflow/core:lib", - "//third_party/python_runtime:headers", + "//third_party/python_runtime:headers", # buildcleaner: keep "@com_google_absl//absl/memory", "@com_google_absl//absl/types:span", ], diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index 671953aefe..76ec3525e9 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/computation_placer.h" #include "tensorflow/compiler/xla/service/cpu/custom_call_target_registry.h" +#include "tensorflow/compiler/xla/service/hlo_graph_dumper.h" #include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/util.h" @@ -582,6 +583,32 @@ string Computation::GetSerializedProto() const { return result; } +StatusOr Computation::GetHloText() const { + TF_ASSIGN_OR_RETURN(const HloModuleConfig module_config, + HloModule::CreateModuleConfigFromProto( + computation_.proto(), GetDebugOptionsFromFlags())); + TF_ASSIGN_OR_RETURN( + std::unique_ptr hlo_module, + HloModule::CreateFromProto(computation_.proto(), module_config)); + HloPrintOptions options; + options = HloPrintOptions::ShortParsable(); + options.set_print_large_constants(false); + return hlo_module->ToString(options); +} + +StatusOr Computation::GetHloDotGraph() const { + TF_ASSIGN_OR_RETURN(const HloModuleConfig module_config, + HloModule::CreateModuleConfigFromProto( + computation_.proto(), GetDebugOptionsFromFlags())); + TF_ASSIGN_OR_RETURN( + std::unique_ptr hlo_module, + HloModule::CreateFromProto(computation_.proto(), module_config)); + hlo_graph_dumper::DotGraphOptions options; + options.debug_options = &hlo_module->config().debug_options(); + return hlo_graph_dumper::HloComputationToDotGraph( + *hlo_module->entry_computation(), options); +} + StatusOr Computation::GetProgramShape() const { return computation_.GetProgramShape(); } diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index 9ff46d57dc..f25df25aa5 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -252,6 +252,12 @@ class Computation { // string on failure. string GetSerializedProto() const; + // Returns the computation in human-readable HLO text format. + StatusOr GetHloText() const; + + // Returns the computation in graphviz dot format. + StatusOr GetHloDotGraph() const; + // Returns the program shape for this computation. StatusOr GetProgramShape() const; diff --git a/tensorflow/compiler/xla/python/local_computation_builder.i b/tensorflow/compiler/xla/python/local_computation_builder.i index 5327ce91db..de1e7d592b 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.i +++ b/tensorflow/compiler/xla/python/local_computation_builder.i @@ -234,6 +234,15 @@ tensorflow::ImportNumpy(); } } +%typemap(out) StatusOr { + if ($1.ok()) { + $result = PyString_FromString($1.ConsumeValueOrDie().c_str()); + } else { + PyErr_SetString(PyExc_RuntimeError, $1.status().ToString().c_str()); + SWIG_fail; + } +} + %typemap(out) Status { if (!$1.ok()) { PyErr_SetString( @@ -1036,6 +1045,8 @@ tensorflow::ImportNumpy(); %unignore xla::swig::Computation::GetProgramShape; %unignore xla::swig::Computation::GetReturnValueShape; %unignore xla::swig::Computation::GetSerializedProto; +%unignore xla::swig::Computation::GetHloText; +%unignore xla::swig::Computation::GetHloDotGraph; %unignore xla::swig::LocalOp; %unignore xla::swig::ComputationBuilder; %unignore xla::swig::ComputationBuilder::ComputationBuilder; diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 020cc587fe..e5668eed79 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -679,6 +679,22 @@ class Computation(object): proto = hlo_pb2.HloModuleProto.FromString(serialized) return proto + def GetHloText(self): + """Get the textual HLO representation of this computation. + + Returns: + A string containing the textual HLO. + """ + return self.computation.GetHloText() + + def GetHloDotGraph(self): + """Get a Graphviz Dot representation of this computation. + + Returns: + A string containing the graphviz dot graph. + """ + return self.computation.GetHloDotGraph() + def Compile(self, argument_shapes=(), compile_options=None, layout_fn=None, backend=None): """Compiles a computation. diff --git a/tensorflow/compiler/xla/python/xla_client_test.py b/tensorflow/compiler/xla/python/xla_client_test.py index f830cb26e3..45ed209c99 100644 --- a/tensorflow/compiler/xla/python/xla_client_test.py +++ b/tensorflow/compiler/xla/python/xla_client_test.py @@ -85,6 +85,26 @@ def NumpyArrayBool(*args, **kwargs): return np.array(*args, dtype=np.bool, **kwargs) +class ComputationPrinting(unittest.TestCase): + + def ExampleComputation(self): + builder = xla_client.ComputationBuilder("acomputation") + p0 = builder.ParameterFromNumpy(np.float32(0)) + p1 = builder.ParameterFromNumpy(np.zeros((4,), np.float32)) + builder.Mul(p0, p1) + return builder.Build() + + def testComputationToHloText(self): + computation = self.ExampleComputation() + hlo_text = computation.GetHloText() + self.assertTrue(hlo_text.startswith("HloModule acomputation")) + + def testComputationToHloGraph(self): + computation = self.ExampleComputation() + hlo_dot_graph = computation.GetHloDotGraph() + self.assertTrue(hlo_dot_graph.startswith("digraph ")) + + class ComputationsWithConstantsTest(ComputationTest): """Tests focusing on Constant ops.""" diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index e6f446c926..4e21c33fec 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -1491,6 +1491,17 @@ string ExportGraph(const string& graph, } // namespace +string HloComputationToDotGraph(const HloComputation& computation, + const DotGraphOptions& options) { + DebugOptions default_debug_options; + return HloDotDumper(&computation, options.label, + options.debug_options ? *options.debug_options + : default_debug_options, + options.show_backend_config, options.profile, + NodeFilter()) + .Dump(); +} + string DumpGraph(const HloComputation& computation, const string& label, const DebugOptions& debug_options, const HloExecutionProfile* hlo_execution_profile, diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.h b/tensorflow/compiler/xla/service/hlo_graph_dumper.h index b5444a32b1..17bbc6abf3 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.h +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.h @@ -26,8 +26,19 @@ limitations under the License. namespace xla { namespace hlo_graph_dumper { +// Converts a HLO module to a DOT (graphviz) graph. Returns the dot graph as +// a string. +struct DotGraphOptions { + absl::string_view label; + const DebugOptions* debug_options = nullptr; + const HloExecutionProfile* profile = nullptr; + bool show_backend_config = false; +}; +string HloComputationToDotGraph(const HloComputation& computation, + const DotGraphOptions& options); + // Abstract interface for classes that render HLO graphs (e.g. DOT graph, -// tensorflow GraphDef). +// tensorflow GraphDef) to files or services. class GraphRendererInterface { public: enum GraphKind { -- GitLab From b1e58acc43b75ab7469f62af12d8aa537bc5185f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 10:43:41 -0800 Subject: [PATCH 216/351] Deflake rnn_cell_test: test floats are close rather than exactly equal. PiperOrigin-RevId: 234172025 --- tensorflow/python/kernel_tests/rnn_cell_test.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py index b8dfbfc942..07eb984491 100644 --- a/tensorflow/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/python/kernel_tests/rnn_cell_test.py @@ -1428,13 +1428,8 @@ class BidirectionalRNNTest(test.TestCase): # Both sequences in batch are length=8. Check that the time=i # forward output is equal to time=8-1-i backward output for i in range(8): - self.assertEqual(out[i][0][0], out[8 - 1 - i][0][3]) - self.assertEqual(out[i][0][1], out[8 - 1 - i][0][4]) - self.assertEqual(out[i][0][2], out[8 - 1 - i][0][5]) - for i in range(8): - self.assertEqual(out[i][1][0], out[8 - 1 - i][1][3]) - self.assertEqual(out[i][1][1], out[8 - 1 - i][1][4]) - self.assertEqual(out[i][1][2], out[8 - 1 - i][1][5]) + self.assertAllClose(out[i][0][0:3], out[8 - 1 - i][0][3:6]) + self.assertAllClose(out[i][1][0:3], out[8 - 1 - i][1][3:6]) # Via the reasoning above, the forward and backward final state should be # exactly the same self.assertAllClose(s_fw, s_bw) -- GitLab From 512b39a494749be6bedd6a05ca49e7acd4ee4dd0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 15 Feb 2019 10:51:31 -0800 Subject: [PATCH 217/351] Remove indirect dependency to clustering_ops_op_lib from kernels. PiperOrigin-RevId: 234173753 --- tensorflow/core/kernels/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index e9033f3ffd..fd7c2745f7 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -160,7 +160,6 @@ tf_kernel_library( name = "clustering_ops", prefix = "clustering_ops", deps = [ - "//tensorflow/core:clustering_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:lib", -- GitLab From ef8173a93749f645807428cdb33f097edd39be33 Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 15 Feb 2019 10:58:54 -0800 Subject: [PATCH 218/351] Add a harder test for return statements. PiperOrigin-RevId: 234175275 --- .../converters/return_statements_test.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tensorflow/python/autograph/converters/return_statements_test.py b/tensorflow/python/autograph/converters/return_statements_test.py index 2942555d17..b2d3d1b920 100644 --- a/tensorflow/python/autograph/converters/return_statements_test.py +++ b/tensorflow/python/autograph/converters/return_statements_test.py @@ -197,6 +197,25 @@ class SingleReturnTest(converter_testing.TestCase): self.assertTransformedEquivalent(test_fn, 4) self.assertTransformedEquivalent(test_fn, 5) + def test_nested_multiple_withs(self): + + def test_fn(x): + v = [] + while x > 0: + x -= 1 + with ops.name_scope(''): + if x % 2 == 0: + return v + with ops.name_scope(''): + v.append(x) + v.append(x) + return v + + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) + if __name__ == '__main__': test.main() -- GitLab From 0a2cde99979e556e838800623607d254d45312ad Mon Sep 17 00:00:00 2001 From: James Ring Date: Fri, 15 Feb 2019 11:10:31 -0800 Subject: [PATCH 219/351] Use protobuf lite in C API This change allows Android targets to depend on C API. PiperOrigin-RevId: 234177903 --- tensorflow/c/c_api.cc | 2 +- tensorflow/c/c_api_function.cc | 7 +++++++ tensorflow/c/c_api_internal.h | 3 ++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index ef22b67fe9..245d7ba2b1 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -641,7 +641,7 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, dimvec.size(), base, size, DeleteArray, base); } -Status MessageToBuffer(const tensorflow::protobuf::Message& in, +Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in, TF_Buffer* out) { if (out->data != nullptr) { return InvalidArgument("Passing non-empty TF_Buffer is invalid."); diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc index 45d6c33a1e..6ec6bed8aa 100644 --- a/tensorflow/c/c_api_function.cc +++ b/tensorflow/c/c_api_function.cc @@ -272,10 +272,17 @@ Status FillFunctionBody( } } if (!node_attr_def) { +#ifdef TENSORFLOW_LITE_PROTOS + return errors::Unimplemented( + "Placeholder value is not supported for attributes not in OpDef. " + "Attribute: ", + node_attr_name); +#else return errors::Unimplemented( "Placeholder value is not supported for attributes not in OpDef. " "Attribute: ", node_attr_name, ", OpDef: ", node->op_def().DebugString()); +#endif } OpDef::AttrDef* attr_def = fdef->mutable_signature()->add_attr(); attr_def->set_name(func_attr_name); diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 73283d7756..d520b6b768 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -204,7 +204,8 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst); TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status); -Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out); +Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in, + TF_Buffer* out); // Set the shapes and types of the output's handle. // -- GitLab From dfd092588b2599b66d5350591d72fb44a13ca584 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 11:15:05 -0800 Subject: [PATCH 220/351] Register gradient for TPUEmbeddingActivation in one place. Factor dummy variable out of tpu_embedding.py. PiperOrigin-RevId: 234178740 --- tensorflow/contrib/tpu/BUILD | 5 +- tensorflow/contrib/tpu/python/ops/tpu_ops.py | 32 +++- .../python/tpu/_tpu_estimator_embedding.py | 1 + .../contrib/tpu/python/tpu/tpu_embedding.py | 119 +++----------- .../tpu/python/tpu/tpu_embedding_gradient.py | 153 ++++++++++++++++++ .../contrib/tpu/python/tpu/tpu_estimator.py | 33 +++- 6 files changed, 235 insertions(+), 108 deletions(-) create mode 100644 tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index d580ca6eb6..6a55cd0f29 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -334,7 +334,10 @@ tf_py_test( py_library( name = "tpu_embedding", - srcs = ["python/tpu/tpu_embedding.py"], + srcs = [ + "python/tpu/tpu_embedding.py", + "python/tpu/tpu_embedding_gradient.py", + ], srcs_version = "PY2AND3", deps = [ ":tpu_lib", diff --git a/tensorflow/contrib/tpu/python/ops/tpu_ops.py b/tensorflow/contrib/tpu/python/ops/tpu_ops.py index 2320306ba9..ec0d5fec44 100644 --- a/tensorflow/contrib/tpu/python/ops/tpu_ops.py +++ b/tensorflow/contrib/tpu/python/ops/tpu_ops.py @@ -24,6 +24,7 @@ import platform from tensorflow.contrib.tpu.python.tpu import tpu_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging if platform.system() != "Windows": @@ -154,6 +155,36 @@ if platform.system() != "Windows": dtypes.complex64, dtypes.uint32 ]) + @ops.RegisterGradient("TPUEmbeddingActivations") + def _embedding_activations_grad(activations_op, grad_wrt_activations): + """Saves the gradient of embedding activations ops in a graph collection.""" + g = ops.get_default_graph() + table_id = activations_op.get_attr("table_id") + lookup_id = activations_op.get_attr("lookup_id") + table_gradients = g.get_collection_ref( + "tpu_embedding_gradients_table_%d" % table_id) + + if not table_gradients: + raise RuntimeError( + "Gradients for TPUEmbedding have been generated in non-training mode." + "This is not expected. Consider putting your Optimizer.minimize code " + "behind the training mode condition check. For Estimator, you can " + "do \n\n" + " if mode == tf.estimator.ModeKeys.TRAIN:\n" + " train_op = opt.minimize(loss)\n" + "\n") + + table_gradients[lookup_id] = array_ops.identity(grad_wrt_activations) + return [ + # RegisterGradient requires that value be returned for all inputs. Since + # the first argument (tpu_gradient_variable_{table_name}) has shape [1], + # we will return zeros(shape=[1]). The actual gradient w.r.t. the + # embedding activations (grad_wrt_activations) has the same shape as the + # activations returned by embedding_activations. + array_ops.zeros(arg.shape, dtype=dtypes.float32) + for arg in activations_op.inputs + ] + def infeed_dequeue(dtype, shape, name=None): """A placeholder op for a value that will be fed into the computation. @@ -234,7 +265,6 @@ if platform.system() != "Windows": return gen_tpu_ops.send_tpu_embedding_gradients( inputs=inputs, learning_rates=learning_rates, config=config, name=name) - send_tpu_embedding_gradients.__doc__ = ( gen_tpu_ops.send_tpu_embedding_gradients.__doc__) diff --git a/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py b/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py index dd239d5d78..98aa7827fc 100644 --- a/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py +++ b/tensorflow/contrib/tpu/python/tpu/_tpu_estimator_embedding.py @@ -286,6 +286,7 @@ class EmbeddingConfig(object): self._optimization_parameters = _get_tpu_embedding_optimization_parameters( self._embedding_config_spec) self._mode_to_tpu_embedding_dict = {} + self.dummy_table_variables = None def has_embedding_tables(self): return bool(self._table_to_config_dict) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py index fcad7b2972..1ba8017cda 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py @@ -34,13 +34,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import gen_tpu_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables TRAINING = elc.TPUEmbeddingConfiguration.TRAINING INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE @@ -268,10 +266,11 @@ class TPUEmbedding(object): base_optimizer) train_op = cross_shard_optimizer.minimize(loss) - # `train_op` and `send_gradients_op` must happen in order. - with ops.control_dependencies([train_op]): - send_gradients_op = embedding.generate_send_gradients_op() - with ops.control_dependencies([send_gradients_op]): + gradients = ( + tpu_embedding_gradient.get_gradients_through_compute_gradients( + cross_shard_optimizer, loss, activations) + send_gradients_op = embedding.generate_send_gradients_op(gradients) + with ops.control_dependencies([train_op, send_gradients_op]): loss = array_ops.identity(loss) loss = tpu.shard(computation, @@ -281,7 +280,6 @@ class TPUEmbedding(object): sess.run(tpu.initialize_system(embedding_config= embedding.config_proto)) sess.run(variables.global_variables_initializer()) - sess.run(embedding.init_ops) sess.run(embedding_variables_and_ops.load_ops()) sess.run(enqueue_ops) loss_val = sess.run(loss) @@ -360,8 +358,6 @@ class TPUEmbedding(object): _validate_batch_size(self._batch_size, self._num_cores) self._batch_size_per_core = self._batch_size // self._num_cores - self._init_ops = [] - # TODO(shizhiw): remove `mode`? if mode == TRAINING: _validate_optimization_parameters(optimization_parameters) @@ -384,9 +380,6 @@ class TPUEmbedding(object): self._optimizer_handler = _get_optimization_handler( self._optimization_parameters) - dummy_table_variables_init_op = self._create_dummy_table_variables() - self._init_ops.append(dummy_table_variables_init_op) - self._config_proto = self._create_config_proto() @property @@ -441,19 +434,6 @@ class TPUEmbedding(object): """ return self._config_proto - @property - def init_ops(self): - """Initialization ops for TPU embedding. - - It must be called after all global variables have been initialized, - i.e. after `global_variables_initializer()`, as it loads embedding - tables into TPU. - - Returns: - A list of ops. - """ - return self._init_ops - @property def table_to_config_dict(self): return copy.copy(self._table_to_config_dict) @@ -462,6 +442,10 @@ class TPUEmbedding(object): def feature_to_table_dict(self): return copy.copy(self._feature_to_table_dict) + @property + def table_to_features_dict(self): + return copy.copy(self._table_to_features_dict) + @property def optimization_parameters(self): return self._optimization_parameters @@ -584,51 +568,6 @@ class TPUEmbedding(object): slot_variables_by_table, load_ops, retrieve_ops) - def _create_dummy_table_variables(self): - """Create dummy embedding table variables. - - The sole purpose of these dummy variables are to trigger gradient - calcuation wrt them so that the gradients wrt activation can be captured - and later sent to TPU embedding. - - Returns: - Initializer for these variables. - - Raises: - RuntimeError: if collection to store gradients already exists and is not - empty. - """ - self._dummy_table_variables = [] - # TODO(shizhiw): remove table id. - for table_id, table in enumerate(self._table_to_features_dict): - self._dummy_table_variables.append( - variable_scope.get_variable( - 'tpu_embedding_dummy_table_variable_%s' % table, - dtype=dtypes.float32, - shape=[1], - use_resource=True, - trainable=True, - # TODO(shizhiw): Remove these dummy variables as - # tensorflow optimizer creates slot variable for them which - # is undesirable. - # e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1}. - # Explicitly specifying collections prevents this variable from - # being added to the GLOBAL_VARIABLES collection, so that Saver() - # ignores it. - collections=['tpu_embedding_dummy_table_variables'])) - - g = ops.get_default_graph() - table_gradients = g.get_collection_ref( - 'tpu_embedding_gradients_table_%d' % table_id) - if table_gradients: - raise RuntimeError( - 'tpu_embedding_gradients_table_%d is not empty.' % table_id) - table_gradients.extend([None] * len(self._table_to_features_dict[table])) - - return variables.variables_initializer( - self._dummy_table_variables, - name='tpu_embedding_dummy_table_variables_init') - def generate_enqueue_ops(self, sparse_features_list): """Generate enqueue ops. @@ -775,52 +714,34 @@ class TPUEmbedding(object): for lookup_id, feature in enumerate(features): start_row = lookup_id * self._batch_size_per_core end_row = start_row + self._batch_size_per_core - activations[feature] = gen_tpu_ops.tpu_embedding_activations( - self._dummy_table_variables[table_id], - recv_activations[table_id][start_row:end_row, :], - table_id=table_id, - lookup_id=lookup_id) + activations[feature] = recv_activations[table_id][start_row:end_row, :] return activations - # TODO(shizhiw): Make `gradient_multiplier` per feature. Setting it to 0 would - # have the effect of `tf.stop_gradients()`. - # TODO(shizhiw): Consider alternative ways to capture gradients wrt embedding - # layer outputs to remove `_dummy_table_variables`, - # `_embedding_activation_grad` and `tpu_embedding_gradients_table_%d'. - def generate_send_gradients_op(self, gradient_multipliers=None): - """Retrieve gradients from collections and send them to TPU embedding. + def generate_send_gradients_op(self, feature_to_gradient_dict): + """Send gradient to TPU embedding. Args: - gradient_multipliers: None, or dict mapping table names to gradient - multiplier Tensors. + feature_to_gradient_dict: dict mapping feature names to gradient wrt + activations. Returns: SendTPUEmbeddingGradients Op. Raises: - ValueError: If required gradients have not been defined. RuntimeError: If `mode` is not `TRAINING`. """ if self._mode != TRAINING: raise RuntimeError('Only in training mode gradients need to ' 'be sent to TPU embedding; got mode {}.' .format(self._mode)) - - g = ops.get_default_graph() - gradients = list() - for table_id, table in enumerate(self._table_to_config_dict): - table_gradients = g.get_collection( - 'tpu_embedding_gradients_table_%d' % table_id) - if any(gradient is None for gradient in table_gradients): - raise ValueError( - 'Table {}/{} has undefined gradients: this is probably because the ' - 'model asked TPUEmbedding to compute activations that were not ' - 'used.'.format(table_id, table)) + gradients = [] + for table in self._table_to_features_dict: + features = self._table_to_features_dict[table] + table_gradients = [ + feature_to_gradient_dict[feature] for feature in features + ] concat_table_grads = array_ops.concat(table_gradients, axis=0) - if gradient_multipliers is not None: - concat_table_grads *= gradient_multipliers[table.name] gradients.append(concat_table_grads) - return tpu_ops.send_tpu_embedding_gradients( inputs=gradients, config=self.config_proto.SerializeToString()) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py new file mode 100644 index 0000000000..dace0d801b --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding_gradient.py @@ -0,0 +1,153 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =================================================================== +"""Optional helper for gradient handling.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables + + +def get_gradients_through_compute_gradients(optimizer, loss, activations): + """Compute gradients to send to TPU embedding. + + Args: + optimizer: a subclass of optimizer.Optimizer, usually CrossShardOptimizer. + Used to call compute_gradients(). + loss: a Tensor to call optimizer.compute_gradients() on. + activations: an OrderedDict mapping feature_name to Tensors of activations. + + Returns: + An OrderedDict mapping from feature name Strings to Tensors of gradients of + the loss wrt the activations of the features. + """ + activation_list = activations.values() + grads_and_vars = optimizer.compute_gradients(loss, activation_list) + grads = [grad for grad, _ in grads_and_vars] + feature_to_gradient_dict = collections.OrderedDict( + zip(activations.keys(), grads)) + return feature_to_gradient_dict + + +def create_dummy_table_variables(tpu_embedding): + """Create dummy embedding table variables. + + The sole purpose of these dummy variables are to trigger gradient + calcuation wrt them so that the gradients wrt activation can be captured + and later sent to TPU embedding. + + Args: + tpu_embedding: TPUEmbedding, dummy table variables will be created for use + with tpu_embedding. + + Returns: + A tuple of dummy variables and their initializer. + + Raises: + RuntimeError: if collection to store gradients already exists and is not + empty. + """ + dummy_table_variables = collections.OrderedDict() + for table_id, table in enumerate(tpu_embedding.table_to_features_dict): + dummy_table_variables[table] = ( + # Explicitly specifying collections prevents this variable from + # being added to the GLOBAL_VARIABLES collection, so that Saver() + # ignores it. + # But Tensorflow optimizer creates slot variable for these dummy + # variable, e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1}, + # which will be in GLOBAL_VARIABLES collection, + variable_scope.get_variable( + 'tpu_embedding_dummy_table_variable_{}'.format(table), + dtype=dtypes.float32, + shape=[1], + use_resource=True, + trainable=True, + collections=['tpu_embedding_dummy_table_variables'])) + + g = ops.get_default_graph() + table_gradients = g.get_collection_ref( + 'tpu_embedding_gradients_table_{}'.format(table_id)) + if table_gradients: + raise RuntimeError( + 'tpu_embedding_gradients_table_{} is not empty.'.format(table_id)) + table_gradients.extend( + [None] * len(tpu_embedding.table_to_features_dict[table])) + + return (dummy_table_variables, + variables.variables_initializer( + dummy_table_variables.values(), + name='tpu_embedding_dummy_table_variables_init')) + + +def hook_dummy_table_variables_to_activations(tpu_embedding, activations, + dummy_table_variables): + """Have activations depend on dummy table variables for gradient intercept. + + Args: + tpu_embedding: TPUEmbedding, activations and dummy_table_variables are from + tpu_embedding. + activations: An OrderedDict of feature name String to activation tensors. + dummy_table_variables: An OrderedDict of table name String to dummy table + variables. + + Returns: + An OrderedDict of feature name String to activation tensors, which can be + used just as the activations input. + """ + new_activations = collections.OrderedDict() + for feature in activations: + table = tpu_embedding.feature_to_table_dict[feature] + new_activations[feature] = tpu_ops.tpu_embedding_activations( + dummy_table_variables[table], + activations[feature], + table_id=tpu_embedding.table_to_config_dict.keys().index(table), + lookup_id=tpu_embedding.table_to_features_dict[table].index(feature)) + return new_activations + + +def get_gradients_through_dummy_table_variables(tpu_embedding): + """Get gradients wrt the activations of each feature. + + Args: + tpu_embedding: TPUEmbedding, create dummy table variable to be used with + tpu_embedding. + + Returns: + An OrderedDict mapping feature name to gradient. + + Raises: + ValueError: if some gradients are not defined. + """ + g = ops.get_default_graph() + feature_to_gradient_dict = collections.OrderedDict() + for table_id, table in enumerate(tpu_embedding.table_to_config_dict): + table_gradients = g.get_collection( + 'tpu_embedding_gradients_table_{}'.format(table_id)) + if any(gradient is None for gradient in table_gradients): + raise ValueError( + 'Table {} with id {} has undefined gradients: this is probably ' + 'because the model asked TPUEmbedding to compute activations that ' + 'were not used.'.format(table, table_id)) + for feature, gradient in zip(tpu_embedding.table_to_features_dict[table], + table_gradients): + feature_to_gradient_dict[feature] = gradient + return feature_to_gradient_dict diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 3066f0bcd8..cb2d369731 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -40,6 +40,7 @@ from tensorflow.contrib.tpu.python.tpu import tensor_tracer from tensorflow.contrib.tpu.python.tpu import tpu from tensorflow.contrib.tpu.python.tpu import tpu_config from tensorflow.contrib.tpu.python.tpu import tpu_context +from tensorflow.contrib.tpu.python.tpu import tpu_embedding_gradient from tensorflow.contrib.tpu.python.tpu import tpu_feed from tensorflow.contrib.tpu.python.tpu import training_loop from tensorflow.contrib.tpu.python.tpu import util as util_lib @@ -1395,11 +1396,19 @@ class _ModelFnWrapper(object): def call_without_tpu(self, features, labels, is_export_mode): return self._call_model_fn(features, labels, is_export_mode=is_export_mode) - def _add_embedding_features(self, features): + def _add_embedding_features(self, features, hook_dummy_table_variables): + """Add embedding features, optionally add hook to intercept gradient.""" if self._ctx.embedding_config: tpu_embedding_ = self._ctx.embedding_config.tpu_embedding embedding_activations = tpu_embedding_.get_activations() - features.update(embedding_activations) + if hook_dummy_table_variables: + new_embedding_activations = ( + tpu_embedding_gradient.hook_dummy_table_variables_to_activations( + tpu_embedding_, embedding_activations, + self._ctx.embedding_config.dummy_table_variables)) + features.update(new_embedding_activations) + else: + features.update(embedding_activations) def convert_to_single_tpu_train_step(self, dequeue_fn): """Converts user provided model_fn` as a single train step on TPU. @@ -1433,7 +1442,7 @@ class _ModelFnWrapper(object): del loss # unused; required in function signature. inputs = dequeue_fn() features, labels = inputs.features_and_labels() - self._add_embedding_features(features) + self._add_embedding_features(features, True) estimator_spec = self._verify_estimator_spec( self._call_model_fn(features, labels)) @@ -1450,7 +1459,13 @@ class _ModelFnWrapper(object): apply_sparse_grads = [] else: tpu_embedding_ = self._ctx.embedding_config.tpu_embedding - apply_sparse_grads = [tpu_embedding_.generate_send_gradients_op()] + gradients = ( + tpu_embedding_gradient.get_gradients_through_dummy_table_variables( + tpu_embedding_) + ) + apply_sparse_grads = [ + tpu_embedding_.generate_send_gradients_op(gradients) + ] # We must run train_op to update the variables prior to running the # outfeed. @@ -1500,7 +1515,7 @@ class _ModelFnWrapper(object): """Evaluation step function for use inside a while loop.""" inputs = dequeue_fn() features, labels = inputs.features_and_labels() - self._add_embedding_features(features) + self._add_embedding_features(features, False) tpu_estimator_spec = self._call_model_fn(features, labels) if not isinstance(tpu_estimator_spec, model_fn_lib._TPUEstimatorSpec): # pylint: disable=protected-access @@ -2774,8 +2789,12 @@ class TPUEstimator(estimator_lib.Estimator): input_fn = features tpu_init_ops = [] - if ctx.embedding_config: - tpu_init_ops.extend(ctx.embedding_config.tpu_embedding.init_ops) + if ctx.embedding_config and mode == model_fn_lib.ModeKeys.TRAIN: + dummy_table_variables, dummy_table_variables_init = ( + tpu_embedding_gradient.create_dummy_table_variables( + ctx.embedding_config.tpu_embedding)) + ctx.embedding_config.dummy_table_variables = dummy_table_variables + tpu_init_ops.append(dummy_table_variables_init) input_holders = _InputPipeline(input_fn, batch_axis, ctx) enqueue_ops, dequeue_fn, input_hooks, run_infeed_loop_on_coordinator = ( -- GitLab From dd2d989bb316f59a36e3e47973814255866ba6c4 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Fri, 15 Feb 2019 11:16:39 -0800 Subject: [PATCH 221/351] Log Keras training and validation summaries to separate runs. Previously, metrics like accuracy would be logged as `epoch_acc` during training and `val_epoch_acc` during training. As of this change, training metrics are logged to a `train/` subdirectory under the top-level logdir, while validation metrics are logged to `validation/`. This has the advantage that training and validation metrics can be shown in the same plot: ![Screenshot of new behavior](https://user-images.githubusercontent.com/4317806/52606214-9e3ddc80-2e26-11e9-9a02-2a5228edc8f6.png) Tested: Running a simple MNIST model generates a TensorBoard instance with summaries as described above, and with a graph under the ?train? run. RELNOTES: Keras training and validation curves are shown on the same plot. PiperOrigin-RevId: 234179031 --- tensorflow/python/keras/callbacks.py | 78 +++++++-- tensorflow/python/keras/callbacks_test.py | 193 ++++++++++++++-------- 2 files changed, 186 insertions(+), 85 deletions(-) diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py index 70119324ea..0111248225 100644 --- a/tensorflow/python/keras/callbacks.py +++ b/tensorflow/python/keras/callbacks.py @@ -1162,6 +1162,10 @@ class TensorBoard(Callback): self._total_batches_seen = 0 self._total_val_batches_seen = 0 + self._writers = [] # file writers to be closed + self._train_writer = None # set in `_initialize_writers` + self._validation_writer = None # set in `_initialize_writers` + def _validate_kwargs(self, kwargs): """Handle arguments were supported in V1.""" if kwargs.get('write_grads', False): @@ -1185,16 +1189,44 @@ class TensorBoard(Callback): """Sets Keras model and writes graph if specified.""" self.model = model with context.eager_mode(): - self.writer = summary_ops_v2.create_file_writer(self.log_dir) + self._initialize_writers() if self.write_graph: if model.run_eagerly: logging.warning('TensorBoard Callback will ignore `write_graph=True`' 'when `Model.run_eagerly=True`.`') else: - with self.writer.as_default(): + with self._train_writer.as_default(): with summary_ops_v2.always_record_summaries(): summary_ops_v2.graph(K.get_graph()) + def _close_writers(self): + """Close all remaining open file writers owned by this callback. + + If there are no such file writers, this is a no-op. + """ + with context.eager_mode(): + for writer in self._writers: + writer.close() + del self._writers[:] + + def _initialize_writers(self): + """Create all file writers needed and validation writers. + + This updates `self._train_writer` and `self._validation_writer`, and + populates the `self._writers` list to be cleaned up by + `_close_writers`. + """ + self._close_writers() + + def create_writer(subdir): + path = os.path.join(self.log_dir, subdir) + return summary_ops_v2.create_file_writer(path) + + self._train_writer = create_writer('train') + self._writers.append(self._train_writer) + self._validation_writer = create_writer('validation') + self._writers.append(self._validation_writer) + def on_batch_end(self, batch, logs=None): """Writes scalar summaries for metrics on every training batch.""" # Don't output batch_size and batch number as TensorBoard summaries @@ -1215,8 +1247,7 @@ class TensorBoard(Callback): self._log_weights(epoch) def on_train_end(self, logs=None): - with context.eager_mode(): - self.writer.close() + self._close_writers() def _log_metrics(self, logs, prefix, step): """Writes metrics out as custom scalar summaries. @@ -1228,20 +1259,37 @@ class TensorBoard(Callback): """ if logs is None: logs = {} - # Scrub non-metric items and assign batch or epoch prefix. - metric_logs = {(prefix + k): v - for k, v in logs.items() - if k not in ['batch', 'size', 'num_steps']} - with context.eager_mode(), \ - self.writer.as_default(), \ - summary_ops_v2.always_record_summaries(): - for name, value in metric_logs.items(): - summary_ops_v2.scalar(name, value, step=step) + + # Group metrics by their associated file writer. Values are lists of + # metrics, as (name, scalar_value) pairs. + logs_by_writer = { + self._train_writer: [], + self._validation_writer: [], + } + validation_prefix = 'val_' + for (name, value) in logs.items(): + if name in ('batch', 'size', 'num_steps'): + # Scrub non-metric items. + continue + if name.startswith(validation_prefix): + name = name[len(validation_prefix):] + writer = self._validation_writer + else: + writer = self._train_writer + name = prefix + name # assign batch or epoch prefix + logs_by_writer[writer].append((name, value)) + + with context.eager_mode(): + with summary_ops_v2.always_record_summaries(): + for writer in logs_by_writer: + with writer.as_default(): + for (name, value) in logs_by_writer[writer]: + summary_ops_v2.scalar(name, value, step=step) def _log_weights(self, epoch): """Logs the weights of the Model to TensorBoard.""" with context.eager_mode(), \ - self.writer.as_default(), \ + self._train_writer.as_default(), \ summary_ops_v2.always_record_summaries(): for layer in self.model.layers: for weight in layer.weights: @@ -1251,7 +1299,7 @@ class TensorBoard(Callback): summary_ops_v2.histogram(weight_name, weight, step=epoch) if self.write_images: self._log_weight_as_image(weight, weight_name, epoch) - self.writer.flush() + self._train_writer.flush() def _log_weight_as_image(self, weight, weight_name, epoch): """Logs a weight as a TensorBoard image.""" diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 4863e5ceac..1bd24aa19d 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -32,6 +32,7 @@ import numpy as np from tensorflow.python import keras from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.eager import context from tensorflow.python.framework import random_seed from tensorflow.python.keras import keras_parameterized from tensorflow.python.keras import testing_utils @@ -966,57 +967,80 @@ class KerasCallbacksTest(keras_parameterized.TestCase): epochs=1) -class _MockSummaryFile(object): - """Mocks a TensorBoard summary file, recording the tag names it sees.""" - - def __init__(self): - self.scalar_names = set() - self.hist_names = set() - self.image_names = set() - - -def _make_mock_scalar_summary(summary_file): - - def _mock_scalar_summary(name, *args, **kwargs): # pylint: disable=unused-argument - summary_file.scalar_names.update({name}) +# A summary that was emitted during a test. Fields: +# logdir: str. The logdir of the FileWriter to which the summary was +# written. +# tag: str. The name of the summary. +_ObservedSummary = collections.namedtuple('_ObservedSummary', ('logdir', 'tag')) - return _mock_scalar_summary +class _MockSummaryFile(object): + """Record summary tag names and the files to which they're written. -def _make_mock_hist_summary(summary_file): - - def _mock_hist_summary(name, *args, **kwargs): # pylint: disable=unused-argument - summary_file.hist_names.update({name}) - - return _mock_hist_summary - - -def _make_mock_image_summary(summary_file): - - def _mock_image_summary(name, *args, **kwargs): # pylint: disable=unused-argument - summary_file.image_names.update({name}) + Fields `scalars`, `images`, and `histograms` are sets containing + `_ObservedSummary` values. + """ - return _mock_image_summary + def __init__(self): + self.scalars = set() + self.images = set() + self.histograms = set() @tf_contextlib.contextmanager -def _mock_summary_api(summary_file): +def _mock_summary_api(): + summary_file = _MockSummaryFile() + + # Keep track of the logdir associated with each created resource. + # (There doesn't seem to be an easy way to get this information after + # the fact.) + resource_logdirs = {} + real_create_file_writer = summary_ops_v2.create_file_writer + + def mock_create_file_writer(logdir, *args, **kwargs): + writer = real_create_file_writer(logdir, *args, **kwargs) + resource = writer._resource + assert resource is not None + assert resource not in resource_logdirs, (resource, resource_logdirs) + resource_logdirs[resource] = logdir + return writer + + def make_mock_summary(summary_set): + + def mock_summary(tag, *args, **kwargs): + del args # unused + del kwargs # unused + resource = context.context().summary_writer_resource + logdir = resource_logdirs[resource] + summary_set.add(_ObservedSummary(logdir=logdir, tag=tag)) + + return mock_summary + with test.mock.patch.object(summary_ops_v2, - 'scalar', - _make_mock_scalar_summary(summary_file)), \ + 'create_file_writer', + mock_create_file_writer), \ + test.mock.patch.object(summary_ops_v2, + 'scalar', + make_mock_summary(summary_file.scalars)), \ test.mock.patch.object(summary_ops_v2, 'histogram', - _make_mock_hist_summary(summary_file)), \ + make_mock_summary(summary_file.histograms)), \ test.mock.patch.object(summary_ops_v2, 'image', - _make_mock_image_summary(summary_file)): - yield + make_mock_summary(summary_file.images)): + yield summary_file @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes(always_skip_v1=True) class TestTensorBoardV2(keras_parameterized.TestCase): + def setUp(self): + super(TestTensorBoardV2, self).setUp() + self.logdir = os.path.join(self.get_temp_dir(), 'tb') + self.train_dir = os.path.join(self.logdir, 'train') + self.validation_dir = os.path.join(self.logdir, 'validation') + def _get_model(self): layers = [ keras.layers.Conv2D(8, (3, 3)), @@ -1028,13 +1052,11 @@ class TestTensorBoardV2(keras_parameterized.TestCase): return model def test_TensorBoard_basic(self): - summary_file = _MockSummaryFile() model = self._get_model() x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - temp_dir = self.get_temp_dir() + '/tb' - tb_cbk = keras.callbacks.TensorBoard(temp_dir) + tb_cbk = keras.callbacks.TensorBoard(self.logdir) - with _mock_summary_api(summary_file): # pylint: disable=not-context-manager + with _mock_summary_api() as summary_file: model.fit( x, y, @@ -1043,17 +1065,18 @@ class TestTensorBoardV2(keras_parameterized.TestCase): validation_data=(x, y), callbacks=[tb_cbk]) - self.assertEqual(summary_file.scalar_names, - {'epoch_loss', 'epoch_val_loss'}) + self.assertEqual( + summary_file.scalars, { + _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), + _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), + }) def test_TensorBoard_batch_metrics(self): - summary_file = _MockSummaryFile() model = self._get_model() x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) - temp_dir = self.get_temp_dir() + '/tb' - tb_cbk = keras.callbacks.TensorBoard(temp_dir, update_freq=1) + tb_cbk = keras.callbacks.TensorBoard(self.logdir, update_freq=1) - with _mock_summary_api(summary_file): # pylint: disable=not-context-manager + with _mock_summary_api() as summary_file: model.fit( x, y, @@ -1062,17 +1085,22 @@ class TestTensorBoardV2(keras_parameterized.TestCase): validation_data=(x, y), callbacks=[tb_cbk]) - self.assertEqual(summary_file.scalar_names, - {'batch_loss', 'epoch_loss', 'epoch_val_loss'}) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag='batch_loss'), + _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), + _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), + }, + ) def test_TensorBoard_weight_histograms(self): - summary_file = _MockSummaryFile() model = self._get_model() x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) temp_dir = self.get_temp_dir() + '/tb' tb_cbk = keras.callbacks.TensorBoard(temp_dir, histogram_freq=1) - with _mock_summary_api(summary_file): # pylint: disable=not-context-manager + with _mock_summary_api() as summary_file: model.fit( x, y, @@ -1081,24 +1109,29 @@ class TestTensorBoardV2(keras_parameterized.TestCase): validation_data=(x, y), callbacks=[tb_cbk]) - self.assertEqual(summary_file.scalar_names, - {'epoch_loss', 'epoch_val_loss'}) - - # Strip Layer names as Layers are created multiple times in test. - hist_names = { - name[name.rfind('/') + 1:] for name in summary_file.hist_names - } - self.assertEqual(hist_names, {'bias_0', 'kernel_0'}) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), + _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), + }, + ) + self.assertEqual( + self._strip_layer_names(summary_file.histograms), + { + _ObservedSummary(logdir=self.train_dir, tag='bias_0'), + _ObservedSummary(logdir=self.train_dir, tag='kernel_0'), + }, + ) def test_TensorBoard_weight_images(self): - summary_file = _MockSummaryFile() model = self._get_model() x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1)) temp_dir = self.get_temp_dir() + '/tb' tb_cbk = keras.callbacks.TensorBoard( temp_dir, histogram_freq=1, write_images=True) - with _mock_summary_api(summary_file): # pylint: disable=not-context-manager + with _mock_summary_api() as summary_file: model.fit( x, y, @@ -1107,19 +1140,39 @@ class TestTensorBoardV2(keras_parameterized.TestCase): validation_data=(x, y), callbacks=[tb_cbk]) - self.assertEqual(summary_file.scalar_names, - {'epoch_loss', 'epoch_val_loss'}) - - # Strip Layer names as Layers are created multiple times in test. - hist_names = { - name[name.rfind('/') + 1:] for name in summary_file.hist_names - } - self.assertEqual(hist_names, {'bias_0', 'kernel_0'}) - - image_names = { - name[name.rfind('/') + 1:] for name in summary_file.image_names - } - self.assertEqual(image_names, {'bias_0', 'kernel_0'}) + self.assertEqual( + summary_file.scalars, + { + _ObservedSummary(logdir=self.train_dir, tag='epoch_loss'), + _ObservedSummary(logdir=self.validation_dir, tag='epoch_loss'), + }, + ) + self.assertEqual( + self._strip_layer_names(summary_file.histograms), + { + _ObservedSummary(logdir=self.train_dir, tag='bias_0'), + _ObservedSummary(logdir=self.train_dir, tag='kernel_0'), + }, + ) + self.assertEqual( + self._strip_layer_names(summary_file.images), + { + _ObservedSummary(logdir=self.train_dir, tag='bias_0'), + _ObservedSummary(logdir=self.train_dir, tag='kernel_0'), + }, + ) + + def _strip_layer_names(self, summaries): + """Deduplicate summary names modulo layer suffix. + + Args: + summaries: A `set` of `_ObservedSummary` values. + + Returns: + A new `set` of `_ObservedSummary` values with layer suffixes + removed. + """ + return {s._replace(tag=s.tag[s.tag.rfind('/') + 1:]) for s in summaries} def test_TensorBoard_invalid_argument(self): with self.assertRaisesRegexp(ValueError, 'Unrecognized arguments'): -- GitLab From 15bf2dd1fd31a534581e20aa41a355e381ad4f84 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Fri, 15 Feb 2019 11:40:39 -0800 Subject: [PATCH 222/351] Make implementation_selector as core grappler optimizer. This is used by RNN API in 2.0. It will add small overhead during optimization stage, specifically: Read all the function signature for a special attribute. Skip the optimization if the attribute is not found. PiperOrigin-RevId: 234183947 --- .../grappler/optimizers/function_api_info.h | 2 ++ .../optimizers/implementation_selector.cc | 8 ++++++-- .../grappler/optimizers/meta_optimizer.cc | 19 +++++++------------ .../core/protobuf/rewriter_config.proto | 3 +++ tensorflow/python/eager/function_test.py | 10 +++------- .../python/keras/layers/unified_gru_test.py | 4 +--- .../python/keras/layers/unified_lstm_test.py | 4 +--- 7 files changed, 23 insertions(+), 27 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_api_info.h b/tensorflow/core/grappler/optimizers/function_api_info.h index 9a5f548951..ffa53a7d8d 100644 --- a/tensorflow/core/grappler/optimizers/function_api_info.h +++ b/tensorflow/core/grappler/optimizers/function_api_info.h @@ -80,6 +80,8 @@ class FunctionLibraryApiInfo { const string& function_name, std::vector* other_functions) const; const FunctionApiInfo* GetApiInfo(const string& function_name) const; + bool empty() const { return func_info_.empty(); } + std::size_t size() const { return func_info_.size(); } private: // Map between function name to function details. diff --git a/tensorflow/core/grappler/optimizers/implementation_selector.cc b/tensorflow/core/grappler/optimizers/implementation_selector.cc index f318569bd1..a370bf9934 100644 --- a/tensorflow/core/grappler/optimizers/implementation_selector.cc +++ b/tensorflow/core/grappler/optimizers/implementation_selector.cc @@ -176,6 +176,10 @@ Status ImplementationSelector::SelectImplementation( VLOG(2) << "Skipping graph since it does not have function def"; return Status::OK(); } + if (lib_info_->empty()) { + VLOG(2) << "Skipping optimization since lib_info is empty"; + return Status::OK(); + } for (int k = 0; k < graph->node_size(); ++k) TF_RETURN_IF_ERROR(MaybeOptimizeFunctionCall(graph->mutable_node(k))); @@ -184,8 +188,8 @@ Status ImplementationSelector::SelectImplementation( } Status ImplementationSelector::Optimize(Cluster* cluster, - const GrapplerItem& item, - GraphDef* optimized_graph) { + const GrapplerItem& item, + GraphDef* optimized_graph) { *optimized_graph = item.graph; TF_RETURN_IF_ERROR(LoadFunctions(*optimized_graph)); return SelectImplementation(optimized_graph); diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 8b21e58073..3f69f469e8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -148,6 +148,9 @@ Status MetaOptimizer::InitializeOptimizers( if (!cfg_.disable_model_pruning()) { optimizers->push_back(MakeUnique()); } + if (cfg_.implementation_selector() == RewriterConfig::ON) { + optimizers->push_back(MakeUnique()); + } if (cfg_.function_optimization() != RewriterConfig::OFF) { optimizers->push_back( MakeUnique(cfg_.function_optimization())); @@ -241,18 +244,10 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers( pre_initialized_optimizers.end()) { continue; } - // Initialize the ImplementationSelector here instead of - // CustomizeOptimizer registry, due the static link issue in TensorRT for - // double registry. - // TODO(laigd): Remove this hack and change it back to use the registry once - // the duplicate static import issue is fixed. - std::unique_ptr custom_optimizer; - if (optimizer_config.name() == "ImplementationSelector") { - custom_optimizer.reset(new ImplementationSelector()); - } else { - custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( - optimizer_config.name()); - } + + auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( + optimizer_config.name()); + if (custom_optimizer) { VLOG(2) << "Registered custom configurable graph optimizer: " << optimizer_config.name(); diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index b5c9599872..18fcb34eac 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,6 +78,9 @@ message RewriterConfig { Toggle scoped_allocator_optimization = 15; // Force small ops onto the CPU (default is ON). Toggle pin_to_host_optimization = 18; + // Enable the swap of kernel implementations based on the device placement + // (default is OFF). + Toggle implementation_selector = 22; // Disable the entire meta optimizer (off by default). bool disable_meta_optimizer = 19; diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 2e2e45cf7f..ab5f6c551e 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1972,14 +1972,10 @@ class FunctionTest(test.TestCase, parameterized.TestCase): graph_function('Not a Tensor.') def testSwapImplementationWithGrapplerPlugin(self): + # Set the min_graph_nodes to -1 since the graph in this test is too small, + # and will be ignored by grappler if don't set this. rewrites = rewriter_config_pb2.RewriterConfig() - # function_optimizer has to be turn off, otherwise it will delete the - # registered function if it does not get called. - # TODO(scottzhu): Move the ImplementationSelector to be called - # before function_optimizer in future. - rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF - customer_optimizer = rewrites.custom_optimizers.add() - customer_optimizer.name = 'ImplementationSelector' + rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON rewrites.min_graph_nodes = -1 graph_options = config_pb2.GraphOptions( rewrite_options=rewrites, build_cost_model=1) diff --git a/tensorflow/python/keras/layers/unified_gru_test.py b/tensorflow/python/keras/layers/unified_gru_test.py index b7e644920a..db86104238 100644 --- a/tensorflow/python/keras/layers/unified_gru_test.py +++ b/tensorflow/python/keras/layers/unified_gru_test.py @@ -47,9 +47,7 @@ from tensorflow.python.training import gradient_descent # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() -_rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF -_customer_optimizer = _rewrites.custom_optimizers.add() -_customer_optimizer.name = 'ImplementationSelector' +_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON _rewrites.min_graph_nodes = -1 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) diff --git a/tensorflow/python/keras/layers/unified_lstm_test.py b/tensorflow/python/keras/layers/unified_lstm_test.py index adc66ddd84..938c87c6b1 100644 --- a/tensorflow/python/keras/layers/unified_lstm_test.py +++ b/tensorflow/python/keras/layers/unified_lstm_test.py @@ -47,9 +47,7 @@ from tensorflow.python.training import gradient_descent # Global config for grappler setting that is used for graph mode test. _rewrites = rewriter_config_pb2.RewriterConfig() -_rewrites.function_optimization = rewriter_config_pb2.RewriterConfig.OFF -_customer_optimizer = _rewrites.custom_optimizers.add() -_customer_optimizer.name = 'ImplementationSelector' +_rewrites.implementation_selector = rewriter_config_pb2.RewriterConfig.ON _rewrites.min_graph_nodes = -1 _graph_options = config_pb2.GraphOptions(rewrite_options=_rewrites) _config = config_pb2.ConfigProto(graph_options=_graph_options) -- GitLab From ed7dfd6663d5171bc89e22ddafd985ee4d25a25a Mon Sep 17 00:00:00 2001 From: Tom Hennigan Date: Fri, 15 Feb 2019 11:54:29 -0800 Subject: [PATCH 223/351] Make tf.Module extend ABCMeta to make abstract subclasses trivial. ``` import abc class AbstractModule(tf.Module): @abc.abstractmethod def __call__(self, inputs, is_training=False): pass ``` You can do this more verbosely if you prefer: ``` import abc import six @six.add_metaclass(abc.ABCMeta) class AbstractModule(tf.Module): @abc.abstractmethod def __call__(self, inputs, is_training=False): pass ``` PiperOrigin-RevId: 234186396 --- tensorflow/python/module/module.py | 3 ++- tensorflow/python/module/module_test.py | 30 +++++++++++++++++++++ tensorflow/python/util/tf_decorator.py | 5 ++++ tensorflow/python/util/tf_decorator_test.py | 14 ++++++++++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/module/module.py b/tensorflow/python/module/module.py index 6e58bb2a70..77b578d13d 100644 --- a/tensorflow/python/module/module.py +++ b/tensorflow/python/module/module.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import abc import re import sys @@ -33,7 +34,7 @@ from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export -class ModuleMetaclass(type): +class ModuleMetaclass(abc.ABCMeta): """Metaclass for `tf.Module`.""" def __new__(mcs, name, bases, clsdict): diff --git a/tensorflow/python/module/module_test.py b/tensorflow/python/module/module_test.py index cd52954971..81a8a505fd 100644 --- a/tensorflow/python/module/module_test.py +++ b/tensorflow/python/module/module_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import abc import collections from absl.testing import parameterized @@ -252,6 +253,21 @@ class ForwardMethodsTest(test.TestCase): b"module_with_function_annotated_call/") +class AbcTest(test.TestCase): + + def testAbstract(self): + msg = "Can't instantiate .* abstract methods" + with self.assertRaisesRegexp(TypeError, msg): + AbstractModule() # pylint: disable=abstract-class-instantiated + + def testConcrete(self): + mod = ConcreteModule() + x, scope_name = mod(2.) + self.assertEqual(x, 4.) + self.assertEqual(scope_name, "concrete_module/") + self.assertEqual(get_name_scope(), "") + + def get_name_scope(): with ops.name_scope("x") as ns: return ns[:-2] @@ -283,6 +299,20 @@ class RecursiveModule(module.Module): self.w = variables.Variable(1.0, trainable=trainable, name="mushroom") +@six.add_metaclass(abc.ABCMeta) +class AbstractModule(module.Module): + + @abc.abstractmethod + def __call__(self, x): + pass + + +class ConcreteModule(AbstractModule): + + def __call__(self, x): + return x ** 2, get_name_scope() + + class TreeModule(module.Module): def __init__(self, name=None): diff --git a/tensorflow/python/util/tf_decorator.py b/tensorflow/python/util/tf_decorator.py index f018e1a1bd..21ed2d7efd 100644 --- a/tensorflow/python/util/tf_decorator.py +++ b/tensorflow/python/util/tf_decorator.py @@ -95,6 +95,11 @@ def make_decorator(target, decorator_func.__name__ = target.__name__ if hasattr(target, '__module__'): decorator_func.__module__ = target.__module__ + if hasattr(target, '__dict__'): + # Copy dict entries from target which are not overridden by decorator_func. + for name in target.__dict__: + if name not in decorator_func.__dict__: + decorator_func.__dict__[name] = target.__dict__[name] if hasattr(target, '__doc__'): decorator_func.__doc__ = decorator.__doc__ decorator_func.__wrapped__ = target diff --git a/tensorflow/python/util/tf_decorator_test.py b/tensorflow/python/util/tf_decorator_test.py index 9198f0b3fa..cd5cdfb290 100644 --- a/tensorflow/python/util/tf_decorator_test.py +++ b/tensorflow/python/util/tf_decorator_test.py @@ -199,6 +199,20 @@ class TfMakeDecoratorTest(test.TestCase): decorator = getattr(decorated, '_tf_decorator') self.assertEqual('test decorator doc', decorator.decorator_doc) + def testUpdatesDictWithMissingEntries(self): + test_function.foobar = True + decorated = tf_decorator.make_decorator(test_function, test_wrapper) + self.assertTrue(decorated.foobar) + del test_function.foobar + + def testUpdatesDict_doesNotOverridePresentEntries(self): + test_function.foobar = True + test_wrapper.foobar = False + decorated = tf_decorator.make_decorator(test_function, test_wrapper) + self.assertFalse(decorated.foobar) + del test_function.foobar + del test_wrapper.foobar + def testSetsTFDecoratorArgSpec(self): argspec = tf_inspect.ArgSpec( args=['a', 'b', 'c'], -- GitLab From 5113410e10dbbf4af642be8b19c094c499773dfb Mon Sep 17 00:00:00 2001 From: Dan Moldovan Date: Fri, 15 Feb 2019 12:03:26 -0800 Subject: [PATCH 224/351] Fix bug causing continue statements to be lowered incorrectly. PiperOrigin-RevId: 234188071 --- .../converters/continue_statements.py | 52 ++++++--- .../converters/continue_statements_test.py | 110 +++++++++++++++--- 2 files changed, 126 insertions(+), 36 deletions(-) diff --git a/tensorflow/python/autograph/converters/continue_statements.py b/tensorflow/python/autograph/converters/continue_statements.py index 725e05360d..780f837fa3 100644 --- a/tensorflow/python/autograph/converters/continue_statements.py +++ b/tensorflow/python/autograph/converters/continue_statements.py @@ -29,11 +29,17 @@ class _Continue(object): def __init__(self): self.used = False self.control_var_name = None - self.create_guard = False - self.guard_created = False def __repr__(self): - return 'used: %s, var: %s' % (self.used, self.control_var_name) + return '<_Continue(used: {}, var: {})>'.format(self.used, + self.control_var_name) + + +class _Block(object): + + def __init__(self): + self.guard_created = False + self.create_guard = False class ContinueCanonicalizationTransformer(converter.Base): @@ -68,15 +74,15 @@ class ContinueCanonicalizationTransformer(converter.Base): # | # created if node) if self.state[_Continue].used: - if self.state[_Continue].guard_created: + if self.state[_Block].guard_created: return node, None - elif not self.state[_Continue].create_guard: - self.state[_Continue].create_guard = True + elif not self.state[_Block].create_guard: + self.state[_Block].create_guard = True return node, None else: - self.state[_Continue].guard_created = True + self.state[_Block].guard_created = True template = """ if ag__.not_(var_name): original_node @@ -90,6 +96,7 @@ class ContinueCanonicalizationTransformer(converter.Base): def _visit_loop_body(self, node, nodes): self.state[_Continue].enter() + self.state[_Block].enter() scope = anno.getanno(node, NodeAnno.BODY_SCOPE) continue_var = self.ctx.namer.new_symbol('continue_', scope.referenced) self.state[_Continue].control_var_name = continue_var @@ -103,14 +110,21 @@ class ContinueCanonicalizationTransformer(converter.Base): control_var_init = templates.replace(template, var_name=continue_var) nodes = control_var_init + nodes + self.state[_Block].exit() self.state[_Continue].exit() return nodes + def _visit_non_loop_body(self, nodes): + self.state[_Block].enter() + nodes = self.visit_block(nodes, after_visit=self._postprocess_statement) + self.state[_Block].exit() + return nodes + def visit_While(self, node): node.test = self.visit(node.test) node.body = self._visit_loop_body(node, node.body) # A continue in the else clause applies to the containing scope. - node.orelse = self.visit_block(node.orelse) + node.orelse = self._visit_non_loop_body(node.orelse) return node def visit_For(self, node): @@ -118,29 +132,29 @@ class ContinueCanonicalizationTransformer(converter.Base): node.iter = self.generic_visit(node.iter) node.body = self._visit_loop_body(node, node.body) # A continue in the else clause applies to the containing scope. - node.orelse = self.visit_block(node.orelse) + node.orelse = self._visit_non_loop_body(node.orelse) + return node + + def visit_If(self, node): + node.body = self.visit_block(node.body) + node.orelse = self._visit_non_loop_body(node.orelse) return node def visit_With(self, node): node.items = self.visit_block(node.items) - node.body = self.visit_block(node.body, - after_visit=self._postprocess_statement) + node.body = self._visit_non_loop_body(node.body) return node def visit_Try(self, node): - node.body = self.visit_block(node.body, - after_visit=self._postprocess_statement) - node.orelse = self.visit_block(node.orelse, - after_visit=self._postprocess_statement) + node.body = self._visit_non_loop_body(node.body) + node.orelse = self._visit_non_loop_body(node.orelse) # In Python 3.8 and later continue is allowed in finally blocks - node.finalbody = self.visit_block(node.finalbody, - after_visit=self._postprocess_statement) + node.finalbody = self._visit_non_loop_body(node.finalbody) node.handlers = self.visit_block(node.handlers) return node def visit_ExceptHandler(self, node): - node.body = self.visit_block(node.body, - after_visit=self._postprocess_statement) + node.body = self._visit_non_loop_body(node.body) return node diff --git a/tensorflow/python/autograph/converters/continue_statements_test.py b/tensorflow/python/autograph/converters/continue_statements_test.py index d6aaa50443..5a1828e318 100644 --- a/tensorflow/python/autograph/converters/continue_statements_test.py +++ b/tensorflow/python/autograph/converters/continue_statements_test.py @@ -20,15 +20,15 @@ from __future__ import print_function from tensorflow.python.autograph.converters import continue_statements from tensorflow.python.autograph.core import converter_testing -from tensorflow.python.eager import context as tfe_ctx from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops from tensorflow.python.platform import test class ContinueCanonicalizationTest(converter_testing.TestCase): def assertTransformedEquivalent(self, test_fn, *inputs): - with self.converted(test_fn, continue_statements, {}, + with self.converted(test_fn, continue_statements, {'ops': ops}, constant_op.constant) as result: self.assertEqual(test_fn(*inputs), result.test_fn(*inputs)) @@ -43,11 +43,10 @@ class ContinueCanonicalizationTest(converter_testing.TestCase): v.append(x) return v - with tfe_ctx.eager_mode(): - self.assertTransformedEquivalent(test_fn, 0) - self.assertTransformedEquivalent(test_fn, 1) - self.assertTransformedEquivalent(test_fn, 3) - self.assertTransformedEquivalent(test_fn, 4) + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) def test_for_loop(self): @@ -60,11 +59,89 @@ class ContinueCanonicalizationTest(converter_testing.TestCase): v.append(x) return v - with tfe_ctx.eager_mode(): - self.assertTransformedEquivalent(test_fn, []) - self.assertTransformedEquivalent(test_fn, [1]) - self.assertTransformedEquivalent(test_fn, [2]) - self.assertTransformedEquivalent(test_fn, [1, 2, 3]) + self.assertTransformedEquivalent(test_fn, []) + self.assertTransformedEquivalent(test_fn, [1]) + self.assertTransformedEquivalent(test_fn, [2]) + self.assertTransformedEquivalent(test_fn, [1, 2, 3]) + + def test_nested_with(self): + + def test_fn(x): + v = [] + while x > 0: + x -= 1 + with ops.name_scope(''): + if x % 2 == 0: + continue + v.append(x) + return v + + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) + + def test_nested_multiple_withs(self): + + def test_fn(x): + v = [] + while x > 0: + x -= 1 + with ops.name_scope(''): + if x % 2 == 0: + continue + with ops.name_scope(''): + v.append(x) + v.append(x) + return v + + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) + + def test_nested_multiple_withs_and_statements(self): + + def test_fn(x): + v = [] + while x > 0: + x -= 1 + with ops.name_scope(''): + if x % 2 == 0: + continue + v.append(x) + v.append(x) + with ops.name_scope(''): + v.append(x) + v.append(x) + return v + + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) + + def test_nested_multiple_withs_and_nested_withs(self): + + def test_fn(x): + v = [] + while x > 0: + x -= 1 + with ops.name_scope(''): + if x % 2 == 0: + continue + with ops.name_scope(''): + v.append(x) + v.append(x) + with ops.name_scope(''): + v.append(x) + v.append(x) + return v + + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) def test_nested(self): @@ -83,11 +160,10 @@ class ContinueCanonicalizationTest(converter_testing.TestCase): v.append(x) return v, u, w - with tfe_ctx.eager_mode(): - self.assertTransformedEquivalent(test_fn, 0) - self.assertTransformedEquivalent(test_fn, 1) - self.assertTransformedEquivalent(test_fn, 3) - self.assertTransformedEquivalent(test_fn, 4) + self.assertTransformedEquivalent(test_fn, 0) + self.assertTransformedEquivalent(test_fn, 1) + self.assertTransformedEquivalent(test_fn, 3) + self.assertTransformedEquivalent(test_fn, 4) if __name__ == '__main__': -- GitLab From 398fce03079f77d0933f941cba3de080a3116859 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 15 Feb 2019 12:05:22 -0800 Subject: [PATCH 225/351] Add a Euclidean norm reduction kernel. This implements a fused sqrt(reduce_sum(x * conj(x))) kernels for CPU (using Eigen) and GPU (using CUB), which is more efficient than the composite implementation at the TF level. It will also be easier to avoid the issue of producing NaNs in the gradient at the origin. Adds tf.math.reduce_euclidian_norm() Python interface to call the fused reduction kernel directly. Gradients will be added in a followup change. PiperOrigin-RevId: 234188431 --- .../base_api/api_def_EuclideanNorm.pbtxt | 39 +++++++++ .../python_api/api_def_EuclideanNorm.pbtxt | 4 + .../core/kernels/reduction_gpu_kernels.cu.h | 39 +++++++++ tensorflow/core/kernels/reduction_ops.h | 39 +++++++++ .../core/kernels/reduction_ops_euclidean.cc | 81 +++++++++++++++++++ .../reduction_ops_gpu_complex128.cu.cc | 1 + .../kernels/reduction_ops_gpu_complex64.cu.cc | 1 + .../kernels/reduction_ops_gpu_double.cu.cc | 11 +-- .../kernels/reduction_ops_gpu_float.cu.cc | 11 +-- .../core/kernels/reduction_ops_gpu_int.cu.cc | 11 +-- .../kernels/reduction_ops_half_mean_sum.cu.cc | 5 +- tensorflow/core/kernels/reduction_ops_test.cc | 5 ++ tensorflow/core/ops/math_ops.cc | 9 +++ tensorflow/python/kernel_tests/BUILD | 2 +- .../python/kernel_tests/reduction_ops_test.py | 76 ++++++++++++++++- tensorflow/python/ops/linalg_ops.py | 2 + tensorflow/python/ops/math_grad.py | 4 + tensorflow/python/ops/math_ops.py | 41 ++++++++++ .../tools/api/golden/v1/tensorflow.math.pbtxt | 4 + .../api/golden/v1/tensorflow.raw_ops.pbtxt | 6 +- .../tools/api/golden/v2/tensorflow.math.pbtxt | 4 + .../api/golden/v2/tensorflow.raw_ops.pbtxt | 6 +- 22 files changed, 380 insertions(+), 21 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_EuclideanNorm.pbtxt create mode 100644 tensorflow/core/kernels/reduction_ops_euclidean.cc diff --git a/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt b/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt new file mode 100644 index 0000000000..7d815b856b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_EuclideanNorm.pbtxt @@ -0,0 +1,39 @@ +op { + graph_op_name: "EuclideanNorm" + endpoint { + name: "EuclideanNorm" + } + in_arg { + name: "input" + description: <